ICU-2401 r20705 => tags/post-cvs2svn
X-SVN-Rev: 20712
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..2f62d8b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,112 @@
+* text=auto !eol
+
+*.c text !eol
+*.cc text !eol
+*.classpath text !eol
+*.cpp text !eol
+*.css text !eol
+*.dsp text !eol
+*.dsw text !eol
+*.filters text !eol
+*.h text !eol
+*.htm text !eol
+*.html text !eol
+*.in text !eol
+*.java text !eol
+*.launch text !eol
+*.mak text !eol
+*.md text !eol
+*.MF text !eol
+*.mk text !eol
+*.pl text !eol
+*.pm text !eol
+*.project text !eol
+*.properties text !eol
+*.py text !eol
+*.rc text !eol
+*.sh text eol=lf
+*.sln text !eol
+*.stub text !eol
+*.txt text !eol
+*.ucm text !eol
+*.vcproj text !eol
+*.vcxproj text !eol
+*.xml text !eol
+*.xsl text !eol
+*.xslt text !eol
+Makefile text !eol
+configure text !eol
+LICENSE text !eol
+README text !eol
+
+*.bin -text
+*.brk -text
+*.cnv -text
+*.icu -text
+*.res -text
+*.nrm -text
+*.spp -text
+*.tri2 -text
+
+unicodetools/com/ibm/rbm/docs/images/TitleLogo_transparent.gif -text
+unicodetools/com/ibm/rbm/docs/images/arrow_bullet.gif -text
+unicodetools/com/ibm/rbm/docs/images/diamond_bullet.gif -text
+unicodetools/com/ibm/rbm/docs/images/ibm_logo_small_white.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/RBReporter.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/basic_file.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/basic_group.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/basic_resource.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/basic_translation.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/basic_untranslated.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/create_group.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/empty_group.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/empty_resource.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/empty_screen.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/empty_with_preferences.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/laf_metal.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/laf_motif.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/laf_windows.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/lookup_resource.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/main_page.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_file.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_file_export.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_file_import.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_help.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_options.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/menu_popup_tree.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/new_baseclass.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/new_bundle.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/preferences_dialog.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_groups_bundle.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_groups_file.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_search.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_stats_bundle.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_stats_file.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_tree_basic.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_untrans_bundle.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_untrans_dialog0.gif -text
+unicodetools/com/ibm/rbm/docs/images/screenshots/view_untrans_file.gif -text
+unicodetools/com/ibm/rbm/docs/images/spacer.gif -text
+unicodetools/com/ibm/rbm/docs/images/template_l.gif -text
+unicodetools/com/ibm/rbm/docs/images/template_line.gif -text
+unicodetools/com/ibm/rbm/docs/images/template_ll.gif -text
+unicodetools/com/ibm/rbm/docs/images/template_u.gif -text
+unicodetools/com/ibm/rbm/docs/images/template_ul.gif -text
+unicodetools/com/ibm/rbm/gui/images/TitleLogo_transparent.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_bundle.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_country.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_file.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_group.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_item.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_language.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_project.gif -text
+unicodetools/com/ibm/rbm/gui/images/tree_icon_variant.gif -text
+
+# The following file types are stored in Git-LFS.
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gif filter=lfs diff=lfs merge=lfs -text
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..568ac8f
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+unicodetools/com/ibm/rbm/lib
diff --git a/colprobe/Makefile.in b/colprobe/Makefile.in
new file mode 100755
index 0000000..c6beffd
--- /dev/null
+++ b/colprobe/Makefile.in
@@ -0,0 +1,81 @@
+## Makefile.in for ICU - extra/colprobe
+## Copyright (c) 2001, International Business Machines Corporation and
+## others. All Rights Reserved.
+
+## Source directory information
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+
+top_builddir = ../..
+
+include $(top_builddir)/icudefs.mk
+
+## Build directory information
+subdir = extra/colprobe
+
+## Extra files to remove for 'make clean'
+CLEANFILES = *~ $(DEPS)
+
+## Target information
+TARGET = colprobe
+LONGNAME = longname
+
+CPPFLAGS += -I$(top_srcdir)/common -I$(top_srcdir)/i18n -I$(top_srcdir)/tools/toolutil -I$(top_srcdir)/io
+LIBS = $(LIBICUI18N) $(LIBICUUC) $(LIBUSTDIO) $(LIBICUTOOLUTIL) $(DEFAULT_LIBS) $(LIB_M)
+
+OBJECTS = colprobeNew.o line.o sortedlines.o strengthprobe.o uprinter.o
+LONGNAME_OBJ = longname.o
+
+DEPS = $(OBJECTS:.o=.d)
+LONGNAME_DEPS = $(LONGNAME_OBJ:.o=.d)
+
+## List of phony targets
+.PHONY : all all-local install install-local clean clean-local \
+distclean distclean-local dist dist-local check check-local
+
+## Clear suffix list
+.SUFFIXES :
+
+## List of standard targets
+all: all-local
+install: install-local
+clean: clean-local
+distclean : distclean-local
+dist: dist-local
+check: all check-local
+
+all-local: $(TARGET)
+
+install-local:
+
+dist-local:
+
+clean-local:
+ test -z "$(CLEANFILES)" || $(RMV) $(CLEANFILES)
+ $(RMV) $(OBJECTS) $(TARGET)
+
+distclean-local: clean-local
+ $(RMV) Makefile
+
+check-local: all-local
+
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ cd $(top_builddir) \
+ && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
+
+$(TARGET) : $(OBJECTS)
+ $(LINK.cc) -o $@ $^ $(LIBS)
+
+$(LONGNAME) : $(LONGNAME_OBJ)
+ $(LINK.cc) -o $@ $^ $(LIBS)
+
+invoke:
+ ICU_DATA=$${ICU_DATA:-$(top_builddir)/data/} TZ=PST8PDT $(INVOKE) $(INVOCATION)
+
+ifeq (,$(MAKECMDGOALS))
+-include $(DEPS)
+else
+ifneq ($(patsubst %clean,,$(MAKECMDGOALS)),)
+-include $(DEPS)
+endif
+endif
diff --git a/colprobe/colprobe.cpp b/colprobe/colprobe.cpp
new file mode 100755
index 0000000..d1751be
--- /dev/null
+++ b/colprobe/colprobe.cpp
@@ -0,0 +1,1730 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File colprobe.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+#include "uoptions.h"
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ures.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+#include "uprops.h"
+#include "hash.h"
+#include "ucol_imp.h"
+
+#include "unicode/ustdio.h"
+#include "unicode/utrans.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <io.h>
+#include <fcntl.h>
+
+#include "colprobe.h"
+
+
+#ifdef WIN32
+#include <windows.h>
+#else
+//
+// Stubs for Windows API functions when building on UNIXes.
+//
+typedef int DWORD;
+inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+#include <sys/time.h>
+unsigned long timeGetTime() {
+ struct timeval t;
+ gettimeofday(&t, 0);
+ unsigned long val = t.tv_sec * 1000; // Let it overflow. Who cares.
+ val += t.tv_usec / 1000;
+ return val;
+};
+inline int LCMapStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+const int LCMAP_SORTKEY = 0;
+#define MAKELCID(a,b) 0
+const int SORT_DEFAULT = 0;
+#endif
+
+#include "line.h"
+
+static UBool gVerbose = FALSE;
+static UBool gDebug = FALSE;
+static UBool gQuiet = FALSE;
+static UBool gExemplar = FALSE;
+
+DWORD gWinLCID;
+int gCount;
+Line **gICULines;
+UCollator *gCol;
+UCollator *gUCA;
+Line source;
+Line target;
+Line *gSource = &source;
+Line *gTarget = ⌖
+Hashtable gElements(FALSE);
+Hashtable gExpansions(FALSE);
+CompareFn gComparer;
+
+const UChar separatorChar = 0x0030;
+
+UFILE *out = NULL;
+UFILE *err = NULL;
+UFILE *log = NULL;
+
+const char *progName = "colprobe";
+
+const char *gLocale = NULL;
+//char platform[256];
+int32_t platformIndex = -1;
+int32_t gPlatformNo = 0;
+int32_t gPlatformIndexes[10];
+int32_t gLocaleNo = 0;
+const char* gLocales[100];
+UBool gRulesStdin = FALSE;
+
+enum {
+ HELP1,
+ HELP2,
+ VERBOSE,
+ QUIET,
+ VERSION,
+ ICUDATADIR,
+ COPYRIGHT,
+ LOCALE,
+ PLATFORM,
+ DEBUG,
+ EXEMPLAR,
+ RULESSTDIN
+};
+
+UOption options[]={
+ /*0*/ UOPTION_HELP_H,
+ /*1*/ UOPTION_HELP_QUESTION_MARK,
+ /*2*/ UOPTION_VERBOSE,
+ /*3*/ UOPTION_QUIET,
+ /*4*/ UOPTION_VERSION,
+ /*5*/ UOPTION_ICUDATADIR,
+ /*6*/ UOPTION_COPYRIGHT,
+ /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG),
+ /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG),
+ /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG),
+ /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG),
+ /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG)
+};
+
+int Winstrcmp(const void *a, const void *b) {
+ gCount++;
+ int t;
+ t = CompareStringW(gWinLCID, 0,
+ (*(Line **)a)->name, (*(Line **)a)->len,
+ (*(Line **)b)->name, (*(Line **)b)->len);
+ return t-2;
+}
+
+int ICUstrcmp(const void *a, const void *b) {
+ gCount++;
+ UCollationResult t;
+ t = ucol_strcoll(gCol,
+ (*(Line **)a)->name, (*(Line **)a)->len,
+ (*(Line **)b)->name, (*(Line **)b)->len);
+ if (t == UCOL_LESS) return -1;
+ if (t == UCOL_GREATER) return +1;
+ return 0;
+}
+
+struct {
+ const char* name;
+ CompareFn comparer;
+} platforms[] = {
+ { "icu", ICUstrcmp },
+ { "win", Winstrcmp}
+};
+
+
+void deleteLineElement(void *line) {
+ delete((Line *)line);
+}
+
+void stringToLower(char *string) {
+ uint32_t i = 0;
+ for(i = 0; i < strlen(string); i++) {
+ string[i] = tolower(string[i]);
+ }
+}
+
+void usage(const char *name) {
+ u_fprintf(out, "Usage: %s --locale loc_name --platform platform\n", name);
+}
+
+void listKnownPlatforms() {
+ int32_t i = 0;
+ u_fprintf(err, "Known platforms:\n");
+ for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
+ u_fprintf(err, "\t%s\n", platforms[i]);
+ }
+}
+
+void addPlatform(const char *platform) {
+ int32_t i;
+ //stringToLower(platform);
+ int32_t oldPlatformNo = gPlatformNo;
+
+ for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
+ if(strcmp(platform, platforms[i].name) == 0) {
+ gPlatformIndexes[gPlatformNo++] = i;
+ }
+ }
+ if(gPlatformNo == oldPlatformNo) {
+ u_fprintf(err, "Unknown platform %s\n", platform);
+ listKnownPlatforms();
+ }
+}
+
+void processArgs(int argc, char* argv[], UErrorCode &status)
+{
+ int32_t i = 0;
+ U_MAIN_INIT_ARGS(argc, argv);
+
+ argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);
+
+ if(argc < 0) {
+ u_fprintf(err, "Unknown option: %s\n", argv[-argc]);
+ usage(progName);
+ return;
+ }
+
+ if(options[0].doesOccur || options[1].doesOccur) {
+ usage(progName);
+ return;
+ }
+ if(options[VERBOSE].doesOccur) {
+ gVerbose = TRUE;
+ }
+ if(options[DEBUG].doesOccur) {
+ gDebug = TRUE;
+ gVerbose = TRUE;
+ }
+ if(options[EXEMPLAR].doesOccur) {
+ gExemplar = TRUE;
+ }
+ if(options[QUIET].doesOccur) {
+ gQuiet = TRUE;
+ }
+/*
+ for(i = 8; i < 9; i++) {
+ if(!options[i].doesOccur) {
+ u_fprintf(err, "Option %s is required!\n", options[i].longName);
+ usage(progName);
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ if(options[i].value == NULL) {
+ u_fprintf(err, "Option %s needs an argument!\n", options[i].longName);
+ usage(progName);
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+*/
+ // ASCII based options specified on the command line
+ // this is for testing purposes, will allow to load
+ // up ICU rules and then poke through them.
+ // In that case, we test only ICU and don't need
+ // a locale.
+ if(options[RULESSTDIN].doesOccur) {
+ gRulesStdin = TRUE;
+ addPlatform("icu");
+ return;
+ }
+
+ if(options[LOCALE].doesOccur) {
+ gLocale = options[LOCALE].value;
+ } else {
+ for(i = 1; i < argc; i++) {
+ gLocales[gLocaleNo++] = argv[i];
+ }
+ }
+ if(options[PLATFORM].doesOccur) {
+ //strcpy(platform, options[PLATFORM].value);
+ //addPlatform("icu");
+ addPlatform(options[PLATFORM].value);
+ } else { // there is a list of platforms
+ u_fprintf(err, "Option %s is required!\n", options[i].longName);
+ usage(progName);
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ //
+ // Set up a Windows LCID
+ //
+ gWinLCID = uloc_getLCID(gLocale);
+ /*
+ if (gLocale != 0) {
+ gWinLCID = MAKELCID(gLocale, SORT_DEFAULT);
+ }
+ else {
+ gWinLCID = uloc_getLCID(gLocale);
+ }
+ */
+
+}
+
+void printRules(const UChar *name, int32_t len, UFILE *file) {
+ // very rudimentary pretty rules print
+ int32_t i = 0;
+ UChar toPrint[16384];
+ int32_t toPrintIndex = 0;
+ for(i = 0; i < len; i++) {
+ if(name[i] == 0x0026) {
+ if(toPrintIndex) {
+ toPrint[toPrintIndex] = 0;
+ u_fprintf(file, "%U\n", toPrint);
+ toPrintIndex = 0;
+ toPrint[toPrintIndex++] = name[i];
+ } else {
+ toPrint[toPrintIndex++] = name[i];
+ }
+ } else {
+ toPrint[toPrintIndex++] = name[i];
+ }
+ }
+ if(toPrintIndex) {
+ toPrint[toPrintIndex] = 0;
+ u_fprintf(file, "%U\n", toPrint);
+ toPrintIndex = 0;
+ }
+
+
+}
+
+void escapeString(const UChar *name, int32_t len, UFILE *file) {
+ u_fprintf(file, "%U", name);
+/*
+ int32_t j = 0;
+ for(j = 0; j < len; j++) {
+ if(name[j] >= 0x20 && name[j] < 0x80) {
+ u_fprintf(file, "%c", name[j]);
+ } else {
+ u_fprintf(file, "\\u%04X", name[j]);
+ }
+ }
+*/
+}
+void escapeALine(Line *line, UFILE *file) {
+ escapeString(line->name, line->len, file);
+}
+
+void escapeExpansion(Line *line, UFILE *file) {
+ escapeString(line->expansionString, line->expLen, file);
+}
+
+void showNames(Line *line, UFILE *file) {
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t j = 0;
+ char charName[256];
+ for(j = 0; j < line->len; j++) {
+ u_charName(line->name[j], U_EXTENDED_CHAR_NAME, charName, 256, &status);
+ u_fprintf(file, "%s ", charName);
+ }
+}
+
+void setArray(Line **array, Line *contents, int32_t size) {
+ int32_t i = 0;
+ for(i = 0; i < size; i++) {
+ array[i] = contents+i;
+ }
+}
+
+// set an array from a Hashtable
+int32_t
+setArray(Line **array, Hashtable *table = &gElements) {
+ int32_t size = table->count();
+ int32_t hashIndex = -1;
+ const UHashElement *hashElement = NULL;
+ int32_t count = 0;
+ while((hashElement = table->nextElement(hashIndex)) != NULL) {
+ array[count++] = (Line *)hashElement->value.pointer;
+ }
+ return size;
+}
+
+UBool trySwamped(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) {
+ u_strcpy(gSource->name, (*smaller)->name);
+ gSource->name[(*smaller)->len] = separatorChar;
+ gSource->name[(*smaller)->len+1] = chars[0];
+ gSource->name[(*smaller)->len+2] = 0;
+ gSource->len = (*smaller)->len+2;
+
+ u_strcpy(gTarget->name, (*greater)->name);
+ gTarget->name[(*greater)->len] = separatorChar;
+ gTarget->name[(*greater)->len+1] = chars[1];
+ gTarget->name[(*greater)->len+2] = 0;
+ gTarget->len = (*greater)->len+2;
+
+ if(comparer(&gSource, &gTarget) > 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool trySwamps(Line **smaller, Line **greater, UChar chars[2], CompareFn comparer) {
+ gSource->name[0] = chars[0];
+ gSource->name[1] = separatorChar;
+ u_strcpy(gSource->name+2, (*smaller)->name);
+ gSource->len = (*smaller)->len+2;
+
+ gTarget->name[0] = chars[1];
+ gTarget->name[1] = separatorChar;
+ u_strcpy(gTarget->name+2, (*greater)->name);
+ gTarget->len = (*greater)->len+2;
+
+ if(comparer(&gSource, &gTarget) < 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UColAttributeValue
+probeStrength(Line** prevLine, Line **currLine, CompareFn comparer) {
+ // Primary swamps secondary
+ // have pairs where [0] 2> [1]
+ UChar primSwamps[][2] = {
+ { 0x00E0, 0x0061 },
+ { 0x0450, 0x0435 },
+ { 0x31a3, 0x310d }
+ };
+ // Secondary swamps tertiary
+ // have pairs where [0] 3> [1]
+ UChar secSwamps[][2] = {
+ { 0x0053, 0x0073 },
+ { 0x0415, 0x0435 },
+ { 0x31b6, 0x310e }
+ };
+ // Secondary is swamped by primary
+ // have pairs where [0] 1> [1]
+ UChar secSwamped[][2] = {
+ { 0x0062, 0x0061 },
+ { 0x0436, 0x0454 },
+ { 0x310e, 0x310d }
+ };
+ // Tertiary is swamped by secondary
+ // have pairs where [0] 2> [1]
+ UChar terSwamped[][2] = {
+ { 0x00E0, 0x0061 },
+ { 0x0450, 0x0435 },
+ { 0x31a3, 0x310d }
+ };
+ int32_t i = 0;
+ // Tertiary swamps equal?
+ int result = 0;
+ // Choose the pair
+ i = 0;
+ /*
+ if((*prevLine)->name[0] > 0xFF && (*currLine)->name[0] > 0xFF) {
+ i = 0;
+ } else if((*prevLine)->name[0] < 0x0400 && (*currLine)->name[0] < 0x0400) {
+ i = 1;
+ } else {
+ i = 2;
+ }
+ */
+ // are they equal?
+ if((result = comparer(prevLine, currLine)) == 0) {
+ return UCOL_IDENTICAL;
+ } else if(result > 0) {
+ //fprintf(stderr, "lines should be ordered!");
+ return UCOL_OFF;
+ } else if(trySwamps(prevLine, currLine, primSwamps[i], comparer)) {
+ return UCOL_PRIMARY;
+ } else if(trySwamps(prevLine, currLine, secSwamps[i], comparer)) {
+ return UCOL_SECONDARY;
+ } else if(trySwamped(prevLine, currLine, terSwamped[i], comparer)) {
+ // is there a tertiary difference
+ return UCOL_TERTIARY;
+ } else {
+ //fprintf(stderr, "Unknown strength!\n");
+ return UCOL_ON;
+ }
+}
+
+// This function tries to probe the set of lines
+// (already sorted by qsort) and deduct the strengths
+void
+analyzeStrength(Line **lines, int32_t size, CompareFn comparer) {
+ int32_t i = 0;
+
+ for(i = 1; i < size; i++) {
+ Line **prevLine = lines+i-1;
+ Line **currLine = lines+i;
+ (*currLine)->strength = probeStrength(prevLine, currLine, comparer);
+ (*currLine)->sortedIndex = i;
+ (*currLine)->previous = *prevLine;
+ (*prevLine)->next = *currLine;
+
+ }
+
+}
+
+void printStrength(UColAttributeValue strength, UFILE *file) {
+ u_fprintf(file, " ");
+ switch(strength) {
+ case UCOL_IDENTICAL:
+ u_fprintf(file, "=");
+ break;
+ case UCOL_TERTIARY:
+ //u_fprintf(file, "<3");
+ u_fprintf(file, "<<<");
+ break;
+ case UCOL_SECONDARY:
+ //u_fprintf(file, "<2");
+ u_fprintf(file, "<<");
+ break;
+ case UCOL_PRIMARY:
+ //u_fprintf(file, "<1");
+ u_fprintf(file, "<");
+ break;
+ case UCOL_OFF:
+ u_fprintf(file, ">?");
+ default:
+ u_fprintf(file, "?!");
+ break;
+ }
+ u_fprintf(file, " ");
+}
+
+void printStrength(Line *line, UFILE *file) {
+ printStrength(line->strength, file);
+}
+
+void printLine(Line *line, UFILE *file) {
+ escapeALine(line, file);
+ if(line->isExpansion) {
+ u_fprintf(file, "/");
+ escapeExpansion(line, file);
+ }
+}
+
+void printOrdering(Line **lines, int32_t size, UFILE *file, UBool useLinks = FALSE) {
+ int32_t i = 0;
+
+ //printLine(*lines);
+ //escapeALine(*lines); // Print first line
+
+ Line *line = NULL;
+ Line *previous = *lines;
+ if(previous->isReset) {
+ u_fprintf(file, "\n& ");
+ escapeALine(previous, file);
+ } else if(!previous->isRemoved) {
+ printLine(previous, file);
+ }
+ i = 1;
+ while(i < size && previous->next) {
+ if(useLinks) {
+ line = previous->next;
+ } else {
+ line = *(lines+i);
+ }
+ if(line->isReset) {
+ u_fprintf(file, "\n& ");
+ escapeALine(line, file);
+ } else if(!line->isRemoved) {
+ if(file == out) {
+ u_fprintf(file, "\n");
+ }
+ if(i > 0) {
+ printStrength(line, file);
+ }
+ printLine(line, file);
+ //escapeALine(line, file);
+ }
+ previous = line;
+ i++;
+ }
+ u_fprintf(file, "\n");
+}
+
+
+void setIndexes(Line **lines, int32_t size) {
+ int32_t i = 0;
+ (*lines)->sortedIndex = 0;
+ for(i = 1; i < size; i++) {
+ Line *line = *(lines+i);
+ Line *prev = *(lines+i-1);
+ line->previous = prev;
+ prev->next = line;
+ line->sortedIndex = i;
+ }
+}
+
+
+// this seems to be a dead end
+void
+noteExpansion(Line **gLines, Line *line, int32_t size, CompareFn comparer) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ UnicodeString key(line->name, line->len);
+ //Line *toInsert = (Line *)gElements.get(key);
+ Line *toInsert = (Line *)gExpansions.get(key);
+ if(toInsert != NULL) {
+ toInsert->isExpansion = TRUE;
+ u_strcpy(toInsert->expansionString, line->expansionString);
+ toInsert->expLen = line->expLen;
+ toInsert->previous->next = toInsert->next;
+ toInsert->next->previous = toInsert->previous;
+ gElements.remove(key);
+ } else {
+ toInsert = new Line(*line);
+ toInsert->isExpansion = TRUE;
+ gElements.put(UnicodeString(toInsert->name, toInsert->len), toInsert, status);
+ }
+
+ int32_t i = 0;
+ Line testLine;
+ Line *l = &testLine;
+ for(i = 0; i < size; i++) {
+ u_strcpy(testLine.name, (*(gLines+i))->name);
+ u_strcat(testLine.name, line->expansionString);
+ testLine.len = (*(gLines+i))->len + line->expLen;
+ if(comparer(&l, &line) > 0) {
+ toInsert->previous = *(gLines+i-1);
+ toInsert->next = *(gLines+i);
+ toInsert->previous->next = toInsert;
+ toInsert->next->previous = toInsert;
+ break;
+ }
+ }
+ if(gVerbose) {
+ u_fprintf(log, "Adding expansion\n");
+ escapeALine(line, log);
+ u_fprintf(log, "/");
+ escapeExpansion(line, log);
+ u_fprintf(log, " ");
+ }
+}
+
+void
+positionExpansions(Line **gLines, int32_t size, CompareFn comparer) {
+ int result = 0;
+ Line *line = NULL;
+ Line *toMove = NULL;
+ int32_t i = 0, j = 0;
+ Line **sortedExpansions = new Line*[gExpansions.count()];
+ int32_t sortedExpansionsSize = setArray(sortedExpansions, &gExpansions);
+ qsort(sortedExpansions, sortedExpansionsSize, sizeof(Line *), comparer);
+ // Make a list of things in the vincinity of expansion candidate
+ for(j = 0; j < sortedExpansionsSize; j++) {
+ line = *(sortedExpansions+j);
+ UnicodeString key(line->name, line->len);
+ toMove = (Line *)gElements.get(key);
+ int32_t i = 0;
+ Line testLine, prevTestLine;
+ Line *l = &testLine;
+ Line *prevL = &prevTestLine;
+ // This can be further optimized, since we now know that we have a
+ // sorted list of expansions, so current can start from toMove, since all
+ // the elements before it are already smaller. In the beggining it needs to
+ // be on gLines, though.
+ Line *current = *gLines;
+ while(current) {
+ if(current == toMove) {
+ // we are wading through a sorted list
+ // if we found ourselves, it means that we
+ // are already in a right place, so no moving
+ // is needed, but we need to make sure we have
+ // the right strength.
+ toMove->strength = probeStrength(&prevL, &toMove, comparer);
+ if(0) {
+ u_fprintf(log, "Positioned expansion without moving ");
+ printLine(toMove, log);
+ u_fprintf(log, " new ordering: \n");
+ printOrdering(gLines, size, log, TRUE);
+ }
+ break;
+ } else {
+ u_strcpy(testLine.name, current->name);
+ if(!current->isExpansion) {
+ u_strcat(testLine.name, line->expansionString);
+ testLine.len = current->len + line->expLen;
+ } else {
+ testLine.len = current->len;
+ }
+ if(comparer(&l, &line) > 0) {
+ // remove from chain
+ if(toMove->next) {
+ toMove->next->strength = probeStrength(&(toMove->previous), &(toMove->next), comparer);
+ toMove->next->previous = toMove->previous;
+ }
+ if(toMove->previous) {
+ toMove->previous->next = toMove->next;
+ }
+
+ // insert
+ toMove->previous = current->previous;
+ toMove->next = current;
+
+ if(current->previous) {
+ current->previous->next = toMove;
+ }
+ current->previous = toMove;
+
+ toMove->strength = probeStrength(&prevL, &toMove, comparer);
+ toMove->next->strength = probeStrength(&toMove, &l, comparer);
+ if(0) {
+ u_fprintf(log, "Positioned expansion ");
+ printLine(toMove, log);
+ u_fprintf(log, " new ordering: \n");
+ printOrdering(gLines, size, log, TRUE);
+ }
+ if(toMove->strength == UCOL_IDENTICAL) {
+ // check for craziness such as s = ss/s
+ // such line would consist of previous (or next) concatenated with the expansion value
+ // make a test
+ UChar fullString[256];
+ u_strcpy(fullString, toMove->previous->name);
+ u_strcat(fullString, toMove->expansionString);
+ if(u_strcmp(fullString, toMove->name) == 0) {
+ toMove->previous->next = toMove->next;
+ toMove->next->previous = toMove->previous;
+ toMove->isRemoved = TRUE;
+ u_fprintf(log, "Removed: ");
+ printLine(toMove, log);
+ u_fprintf(log, "\n");
+ }
+ } else if(toMove->next->strength == UCOL_IDENTICAL) {
+ UChar fullString[256];
+ u_strcpy(fullString, toMove->next->name);
+ u_strcat(fullString, toMove->expansionString);
+ if(u_strcmp(fullString, toMove->name) == 0) {
+ toMove->next->strength = toMove->strength;
+ toMove->previous->next = toMove->next;
+ toMove->next->previous = toMove->previous;
+ toMove->isRemoved = TRUE;
+ u_fprintf(log, "Removed because of back: ");
+ printLine(toMove, log);
+ u_fprintf(log, "\n");
+ }
+ }
+ break;
+ }
+ prevTestLine = testLine;
+ }
+ current = current->next;
+ }
+ }
+ delete[] sortedExpansions;
+}
+
+
+void
+noteExpansion(Line *line) {
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString key(line->name, line->len);
+ Line *el = (Line *)gElements.get(key);
+ if(el != NULL) {
+ el->isExpansion = TRUE;
+ u_strcpy(el->expansionString, line->expansionString);
+ el->expLen = line->expLen;
+ } else {
+ Line *toInsert = new Line(*line);
+ toInsert->isExpansion = TRUE;
+ gElements.put(UnicodeString(line->name, line->len), toInsert, status);
+ }
+
+ Line *el2 = (Line *)gExpansions.get(key);
+ el2->isExpansion = TRUE;
+ u_strcpy(el2->expansionString, line->expansionString);
+ el2->expLen = line->expLen;
+
+ if(gDebug) {
+ u_fprintf(log, "Adding expansion\n");
+ printLine(line, log);
+ u_fprintf(log, "\n");
+ }
+}
+
+void
+noteContraction(Line *line) {
+ UErrorCode status = U_ZERO_ERROR;
+ Line *toInsert = new Line(*line);
+ toInsert->isContraction = TRUE;
+ gElements.put(UnicodeString(line->name, line->len), toInsert, status);
+ if(gVerbose) {
+ u_fprintf(log, "Adding contraction\n");
+ escapeALine(line, log);
+ u_fprintf(log, " ");
+ }
+}
+
+void
+noteElement(Line *line) {
+ UErrorCode status = U_ZERO_ERROR;
+ Line *toInsert = new Line(*line);
+ gElements.put(UnicodeString(line->name, line->len), toInsert, status);
+ if(0) { //if(gDebug)
+ escapeALine(line, log);
+ u_fprintf(log, " ");
+ }
+}
+
+
+
+// This function checks if a combination of characters has changed place with the
+// adjacent elements. If so, these are most probably contractions.
+// However, it still needs to be checked if these contractions are fake - the
+// test is simple - if xy is suspected contraction, if we get that x/y is expansion, then
+// xy is a fake contraction.
+int32_t
+analyzeContractions(Line** lines, int32_t size, CompareFn comparer) {
+ int32_t i = 0, j = 0;
+ int32_t outOfOrder = 0;
+ UColAttributeValue strength = UCOL_OFF;
+ UColAttributeValue currStrength = UCOL_OFF;
+ Line **prevLine = lines;
+ Line **currLine = NULL;
+ Line **backupLine = NULL;
+ UBool prevIsContraction = FALSE, currIsContraction = FALSE;
+ // Problem here is detecting a contraction that is at the very end of the sorted list
+ for(i = 1; i < size; i++) {
+ currLine = lines+i;
+ strength = probeStrength(prevLine, currLine, comparer);
+ if(strength == UCOL_OFF || strength != (*currLine)->strength) {
+ prevIsContraction = FALSE;
+ currIsContraction = FALSE;
+ if(!outOfOrder) {
+ if(gVerbose) {
+ u_fprintf(log, "Possible contractions: ");
+ }
+ }
+ // now we have two elements that are different. The question is,
+ // which one of them is the contraction - which one has moved.
+ // Could be the previous, but could also be the current.
+
+ outOfOrder++;
+
+ // First, lets check whether the previous has jumped back
+ j = i+1;
+ // skip all the nexts that have smaller strength, they don't have an effect
+ while(j < size && (*(lines+j))->strength > (*currLine)->strength) {
+ j++;
+ }
+ // check if there are other elements of same or greater strength
+ while(j < size &&
+ (strength = probeStrength(prevLine, (backupLine = lines+j), comparer)) == UCOL_OFF) {
+ j++;
+ // if we skipped more than one, it might be in fact a contraction
+ prevIsContraction = TRUE;
+ }
+ if(prevIsContraction) {
+ noteContraction(*prevLine);
+ j = i-2;
+ // add all the previous elements with smaller strength, since they also
+ // will jump over and are contractions
+ while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) {
+ strength = probeStrength(lines+j, currLine, comparer);
+ if(strength == UCOL_OFF) {
+ noteContraction(*(lines+j));
+ }
+ j--;
+ }
+ }
+
+ // now we check if the current element is jumping forward,
+ // the dance steps are analogous to above.
+ j = i - 2;
+ while(j >= 0 && (*(lines+j+1))->strength > (*currLine)->strength) {
+ j--;
+ }
+ while(j >= 0 &&
+ (strength = probeStrength((backupLine = lines+j), currLine, comparer)) == UCOL_OFF) {
+ j--;
+ currIsContraction = TRUE;
+ }
+ if(currIsContraction) {
+ if(gVerbose) {
+ escapeALine(*currLine, log);
+ u_fprintf(log, " ");
+ }
+ j = i+1;
+ while(j < size && (*(lines+j))->strength > (*currLine)->strength) {
+ strength = probeStrength(prevLine, lines+j, comparer);
+ if(strength == UCOL_OFF) {
+ noteContraction(*(lines+j));
+ }
+ j++;
+ }
+ }
+
+ // Not sure about either. List both and then check
+ if(!(prevIsContraction || currIsContraction)) {
+ noteContraction(*prevLine);
+ noteContraction(*currLine);
+ }
+ }
+ prevLine = currLine;
+ }
+ if(outOfOrder) {
+ if(gVerbose) {
+ u_fprintf(log, "\n");
+ }
+ }
+ return outOfOrder;
+}
+
+int32_t
+detectContractions(Line **gLines, Line *lines, int32_t size, CompareFn comparer) {
+ int32_t i = 0, j = 0;
+ int32_t noContractions = 0;
+ // Create and compare doubles:
+ Line *backupLines = new Line[size];
+ Line::copyArray(backupLines, lines, size);
+ // detect contractions
+
+ Line **gLinesBackup = NULL; //new Line*[size];
+
+ for(i = 0; i < size; i++) {
+ // preserve index and previous
+ Line::copyArray(lines, backupLines, size);
+ for(j = 0; j < size; j++) {
+ u_strcpy(lines[j].name, backupLines[i].name);
+ u_strcat(lines[j].name, backupLines[j].name);
+ lines[j].len = backupLines[i].len+backupLines[j].len;
+ }
+
+ if((noContractions += analyzeContractions(gLines, size, comparer)) && gDebug) {
+ if(gLinesBackup == NULL) {
+ gLinesBackup = new Line*[size];
+ }
+ // Show the sorted doubles, for debugging
+ setArray(gLinesBackup, lines, size);
+ qsort(gLinesBackup, size, sizeof(Line *), comparer);
+ //setIndexes(gLinesBackup, size);
+ analyzeStrength(gLinesBackup, size, comparer);
+ printOrdering(gLinesBackup, size, log);
+ }
+ if(!gQuiet) {
+ u_fprintf(log, ".");
+ }
+ }
+ if(!gQuiet) {
+ u_fprintf(log, "\n");
+ }
+ delete[] backupLines;
+ if(gLinesBackup) {
+ delete[] gLinesBackup;
+ }
+ return noContractions;
+}
+
+// gLines in this function is an array of sorted pointers.
+// Contractions are already included.
+int32_t
+detectExpansions(Line **gLines, int32_t size, CompareFn comparer) {
+ UErrorCode status = U_ZERO_ERROR;
+ // detect expansions
+
+ UColAttributeValue startStrength = UCOL_OFF, endStrength = UCOL_OFF,
+ strength = UCOL_OFF, previousStrength = UCOL_OFF;
+ Line start, end, src;
+ Line *startP = &start, *endP = &end, *srcP = &src;
+ Line *current = NULL;
+ memset(startP, 0, sizeof(Line));
+ memset(endP, 0, sizeof(Line));
+ memset(srcP, 0, sizeof(Line));
+ int32_t srcLen;
+ int32_t i = 0, j = 0, k = 0;
+ for(i = 0; i < size; i++) {
+ u_strcpy(start.name, (*(gLines+i))->name);
+ u_strcpy(end.name, (*(gLines+i))->name);
+ srcLen = (*(gLines+i))->len;
+ u_strcpy(start.name+srcLen, (*(gLines))->name);
+ start.len = srcLen + (*(gLines))->len;
+ u_strcpy(end.name+srcLen, (*(gLines+size-1))->name);
+ end.len = srcLen + (*(gLines+size-1))->len;
+
+ for(k = 0; k < size; k++) { // k is index of a thing that is not doubled
+ current = *(gLines+k);
+ // see if we have moved to front
+ // has it moved to the very beggining
+ if((startStrength = probeStrength((gLines+k), &startP, comparer)) != UCOL_OFF) {
+ continue; // this one is in the front
+ }
+ // has it moved to the very end?
+ if((endStrength = probeStrength(&endP, (gLines+k), comparer)) != UCOL_OFF) {
+ continue; // this one is in the back
+ }
+ // Potential Expansion
+ if(gDebug) { //gVerbose
+ u_fprintf(log, "Possible expansion: ");
+ escapeALine(*(gLines+k), log);
+ u_fprintf(log, " ");
+ }
+ // Now we have to make sure that this is really an expansion
+ // First, we have to find it
+ u_strcpy(src.name, (*(gLines+i))->name);
+ for(j = 0; j < size; j++) {
+ u_strcpy(src.name+srcLen, (*(gLines+j))->name);
+ src.len = srcLen + (*(gLines+j))->len;
+ if((strength = probeStrength(&srcP, (gLines+k), comparer)) == UCOL_OFF) {
+ strength = probeStrength((gLines+k), &srcP, comparer);
+ // we found it *(gLines+j-1) is the element that is interesting
+ // since gLines+j-1 < gLines+k < gLines+j
+ if(gDebug) { //gVerbose
+ u_fprintf(log, "i = %i, k = %i, j = %i ", i, k, j);
+ escapeALine(*(gLines+i), log);
+ escapeALine(*(gLines+j-1), log);
+ printStrength(previousStrength, log);
+ escapeALine(current, log);
+ printStrength(strength, log);
+ escapeALine(*(gLines+i), log);
+ escapeALine(*(gLines+j), log);
+ u_fprintf(log, "\n");
+ }
+ // check whether it is a contraction that is the same as an expansion
+ // or a multi character that doesn't do anything
+ current->addExpansionHit(i, j);
+ current->isExpansion = TRUE;
+ current->expIndex = k;
+ // cache expansion
+ gExpansions.put(UnicodeString(current->name, current->len), current, status); //new Line(*current)
+ break;
+ }
+ previousStrength = strength;
+ }
+ }
+ if(!gQuiet) {
+ u_fprintf(log, ".");
+ }
+ }
+ if(!gQuiet) {
+ u_fprintf(log, "\n");
+ }
+ // now we have identified possible expansions. We need to find out how do they expand.
+ // Let's iterate over expansions cache - it's easier.
+ const UHashElement *el = NULL;
+ int32_t hashIndex = -1;
+ Line *doubles = new Line[size*10];
+ Line **sorter = new Line*[size*10];
+ int32_t currSize = 0;
+ int32_t newSize = 0;
+ Line *prev = NULL;
+ Line *next = NULL;
+ Line *origin = NULL;
+ int result = 0;
+ // Make a list of things in the vincinity of expansion candidate
+ // in expansionPrefixes and expansionAfter we have stored the
+ // prefixes of stuff that caused the detection of an expansion
+ // and a position where the expansion was.
+ // For example (icu, de__PHONEBOOK), we had:
+ // aE <<< \u00E4 < af
+ // AD < \u00E4 <<< Ae
+ // From that we will construct the following sequence:
+ // AD < aE <<< \u00E4/ <<< Ae < af
+ // then we will take the vincinity of \u00E4:
+ // aE <<< \u00E4/ <<< Ae
+ // then we will choose the smallest expansion to be the expansion
+ // part: 'e'.
+ // if there is equality, we choose the equal part:
+ // (win32, de__PHONEBOOK):
+ // AD < \u00E4/ = ae <<< aE <<< Ae
+ // we choose 'e'.
+
+ while((el = gExpansions.nextElement(hashIndex)) != NULL) {
+ newSize = 0;
+ current = (Line *)el->value.pointer;
+ currSize = size*current->expansionPrefixesSize;
+ if(gDebug) {
+ escapeALine(current, log);
+ u_fprintf(log, " Number: %i\n", current->expansionPrefixesSize);
+ }
+ // construct the doubles
+ for(i = 0; i < current->expansionPrefixesSize; i++) {
+ doubles[newSize].suffix = current->expansionAfter[i]-1;
+ doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i]-1));
+ doubles[newSize].suffix = current->expansionAfter[i];
+ doubles[newSize++].setToConcat(*(gLines+current->expansionPrefixes[i]), *(gLines+current->expansionAfter[i]));
+ }
+ // add the expansion we're observing
+ doubles[newSize++] = *current;
+ setArray(sorter, doubles, newSize);
+ qsort(sorter, newSize, sizeof(Line*), comparer);
+ analyzeStrength(sorter, newSize, comparer);
+ if(gDebug) {
+ printOrdering(sorter, newSize, log);
+ }
+ i = 0;
+ while(**(sorter+i) != *current) {
+ i++;
+ }
+ // find the two additions
+ if((*(sorter+i))->strength == UCOL_IDENTICAL) {
+ // if we ae id
+ origin = *(gLines+((*(sorter+i-1))->suffix));
+ u_strcpy(current->expansionString, origin->name);
+ current->expLen = origin->len;
+ } else if(i < newSize-1 && (*(sorter+i+1))->strength == UCOL_IDENTICAL) {
+ origin = *(gLines+((*(sorter+i+1))->suffix));
+ u_strcpy(current->expansionString, origin->name);
+ current->expLen = origin->len;
+ } else {
+ if(i > 0) {
+ prev = *(gLines+(*(sorter+i-1))->suffix);
+ if(i < newSize-1) {
+ next = *(gLines+(*(sorter+i+1))->suffix);
+ result = comparer(&prev, &next);
+ if(result <= 0) {
+ u_strcpy(current->expansionString, prev->name);
+ current->expLen = prev->len;
+ } else {
+ u_strcpy(current->expansionString, next->name);
+ current->expLen = next->len;
+ }
+ }
+ }
+ if(0) { //if(gDebug)
+ u_fprintf(log, "Expansion is: ");
+ escapeALine(current, log);
+ u_fprintf(log, "/");
+ escapeExpansion(current, log);
+ u_fprintf(log, "\n");
+ }
+ }
+ noteExpansion(current);
+ //noteExpansion(gLines, current, size, comparer);
+ if(!gQuiet) {
+ u_fprintf(log, ".");
+ }
+ }
+ if(!gQuiet) {
+ u_fprintf(log, "\n");
+ }
+ delete[] doubles;
+ delete[] sorter;
+ return gExpansions.count();
+}
+
+UBool
+isTailored(Line *line, UErrorCode &status) {
+ UBool result = FALSE;
+ UCollationElements *tailoring = ucol_openElements(gCol, line->name, line->len, &status);
+ UCollationElements *uca = ucol_openElements(gUCA, line->name, line->len, &status);
+
+ int32_t tailElement = UCOL_NULLORDER;
+ int32_t ucaElement = UCOL_NULLORDER;
+
+ do {
+ do {
+ tailElement = ucol_next(tailoring, &status);
+ } while(tailElement == 0);
+ do {
+ ucaElement = ucol_next(uca, &status);
+ } while(ucaElement == 0);
+ if(tailElement != ucaElement) {
+ result = TRUE;
+ break;
+ }
+ } while (tailElement != UCOL_NULLORDER && ucaElement != UCOL_NULLORDER);
+
+ ucol_closeElements(tailoring);
+ ucol_closeElements(uca);
+ return result;
+}
+
+void
+reduceUntailored(Line **gLines, int32_t size){
+ UErrorCode status = U_ZERO_ERROR;
+ Line *current = *(gLines);
+ Line *previous = NULL;
+ while(current) {
+ // if the current line is not tailored according to the UCA
+ if(!isTailored(current, status)) {
+ // we remove it
+ current->isRemoved = TRUE;
+ } else {
+ // if it's tailored
+ if(current->previous && current->previous->isRemoved == TRUE) {
+ previous = current->previous;
+ while(previous && (previous->strength > current->strength || previous->isExpansion || previous->isContraction) && previous->isRemoved) {
+ if(previous->previous && previous->previous->isRemoved) {
+ previous = previous->previous;
+ } else {
+ break;
+ }
+ }
+ if(previous) {
+ previous->isReset = TRUE;
+ } else {
+ (*(gLines))->isReset = TRUE;
+ }
+ }
+ }
+ current = current->next;
+ }
+}
+
+void
+constructAndAnalyze(Line **gLines, Line *lines, int32_t size, CompareFn comparer) {
+ int32_t i = 0, j = 0, k = 0;
+ // setup our compare arrays to point to single set.
+
+ // For contractions we need a block of data
+ setArray(gLines, lines, size);
+ //size = setArray(gLines);
+
+ qsort(gLines, size, sizeof(Line *), comparer);
+
+ // Establish who is previous according to the sort order
+ //setIndexes(gLines, size);
+
+ analyzeStrength(gLines, size, comparer);
+ if(gVerbose) {
+ u_fprintf(log, "Ordering:\n");
+ printOrdering(gLines, size, log);
+ }
+
+ //showDifferences(exemplarSetSize);
+ //dumpData(exemplarSetSize);
+
+ if(!gQuiet) {
+ u_fprintf(log, "Detecting contractions?\n");
+ }
+ int32_t noContractions = 0;
+ noContractions = detectContractions(gLines, lines, size, comparer);
+ if(!gQuiet) {
+ u_fprintf(log, "Detected %i contractions\n", noContractions);
+ }
+
+ // now we have suspected contractions in the table
+ // we have to re-sort the things
+ size = setArray(gLines);
+ qsort(gLines, size, sizeof(Line *), comparer);
+ analyzeStrength(gLines, size, comparer);
+
+ if(!gQuiet) {
+ u_fprintf(log, "Detecting expansions\n");
+ }
+ int32_t noExpansions = detectExpansions(gLines, size, comparer);
+ if(!gQuiet) {
+ u_fprintf(log, "Detected %i expansions\n", noExpansions);
+ }
+
+ positionExpansions(gLines, size, comparer);
+
+ if(gVerbose) {
+ u_fprintf(log, "After positioning expansions:\n");
+ printOrdering(gLines, size, log, TRUE);
+ }
+ //reduceUntailored(gLines, size);
+ if(!gQuiet) {
+ u_fprintf(out, "Final result\n");
+ }
+ printOrdering(gLines, size, out, TRUE);
+ printOrdering(gLines, size, log, TRUE);
+}
+
+// Check whether upper case comes before lower case or vice-versa
+int32_t
+checkCaseOrdering(void) {
+ UChar stuff[][3] = {
+ { 0x0061, separatorChar, 0x0061}, //"aa",
+ { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0",
+ { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a",
+ { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a",
+ //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0"
+ };
+ const int32_t size = sizeof(stuff)/sizeof(stuff[0]);
+
+ Line **sortedLines = new Line*[size];
+ Line lines[size];
+
+ int32_t i = 0;
+ int32_t ordered = 0, reversed = 0;
+
+ for(i = 0; i < size; i++) {
+ lines[i].setName(stuff[i], 3);
+ }
+ setArray(sortedLines, lines, size);
+ qsort(sortedLines, size, sizeof(Line*), gComparer);
+
+ for(i = 0; i < size; i++) {
+ if(*(sortedLines+i) == &lines[i]) {
+ ordered++;
+ }
+ if(*(sortedLines+i) == &lines[size-i-1]) {
+ reversed++;
+ }
+ }
+
+ delete[] sortedLines;
+ if(ordered == size) {
+ return 0; // in normal order
+ } else if(reversed == size) {
+ return 1; // in reversed order
+ } else {
+ return -1; // unknown order
+ }
+}
+
+
+// Check whether the secondaries are in the straight or reversed order
+int32_t
+checkSecondaryOrdering(void) {
+ UChar stuff[][5] = {
+ { 0x0061, separatorChar, 0x0061, separatorChar, 0x00E0 }, //"aa",
+ { 0x0061, separatorChar, 0x00E0, separatorChar, 0x0061 }, //"a\\u00E0",
+ { 0x00E0, separatorChar, 0x0061, separatorChar, 0x0061 }, //"\\u00E0a",
+ //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0"
+ };
+ const int32_t size = sizeof(stuff)/sizeof(stuff[0]);
+
+ Line **sortedLines = new Line*[size];
+ Line lines[size];
+
+ int32_t i = 0;
+ int32_t ordered = 0, reversed = 0;
+
+ for(i = 0; i < size; i++) {
+ lines[i].setName(stuff[i], 5);
+ }
+ setArray(sortedLines, lines, size);
+ qsort(sortedLines, size, sizeof(Line*), gComparer);
+
+ for(i = 0; i < size; i++) {
+ if(*(sortedLines+i) == &lines[i]) {
+ ordered++;
+ }
+ if(*(sortedLines+i) == &lines[size-i-1]) {
+ reversed++;
+ }
+ }
+
+ delete[] sortedLines;
+ if(ordered == size) {
+ return 0; // in normal order
+ } else if(reversed == size) {
+ return 1; // in reversed order
+ } else {
+ return -1; // unknown order
+ }
+}
+
+// We have to remove ignorable characters from the exemplar set,
+// otherwise, we get messed up results
+void removeIgnorableChars(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) {
+ UnicodeSet ignorables, primaryIgnorables;
+ UnicodeSetIterator exemplarUSetIter(exemplarUSet);
+ exemplarUSetIter.reset();
+ Line empty;
+ Line *emptyP = ∅
+ Line current;
+ Line *currLine = ¤t;
+ UColAttributeValue strength = UCOL_OFF;
+
+
+ while(exemplarUSetIter.next()) {
+ if(exemplarUSetIter.isString()) { // process a string
+ u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length());
+ currLine->len = exemplarUSetIter.getString().length();
+ strength = probeStrength(&emptyP, &currLine, comparer);
+ if(strength == UCOL_IDENTICAL) {
+ ignorables.add(exemplarUSetIter.getString());
+ } else if(strength > UCOL_PRIMARY) {
+ primaryIgnorables.add(exemplarUSetIter.getString());
+ }
+ } else { // process code point
+ UBool isError = FALSE;
+ UChar32 codePoint = exemplarUSetIter.getCodepoint();
+ currLine->len = 0;
+ U16_APPEND(currLine->name, currLine->len, 25, codePoint, isError);
+ strength = probeStrength(&emptyP, &currLine, comparer);
+ if(strength == UCOL_IDENTICAL) {
+ ignorables.add(codePoint);
+ } else if(strength > UCOL_PRIMARY) {
+ primaryIgnorables.add(codePoint);
+ }
+ }
+ }
+
+
+
+ exemplarUSet.removeAll(ignorables);
+ exemplarUSet.removeAll(primaryIgnorables);
+
+ UnicodeString removedPattern;
+ if(ignorables.size()) {
+ u_fprintf(log, "Ignorables:\n");
+ ignorables.toPattern(removedPattern, TRUE);
+ removedPattern.setCharAt(removedPattern.length(), 0);
+ escapeString(removedPattern.getBuffer(), removedPattern.length(), log);
+ u_fprintf(log, "\n");
+ }
+ if(primaryIgnorables.size()) {
+ u_fprintf(log, "Primary ignorables:\n");
+ primaryIgnorables.toPattern(removedPattern, TRUE);
+ removedPattern.setCharAt(removedPattern.length(), 0);
+ escapeString(removedPattern.getBuffer(), removedPattern.length(), log);
+ u_fprintf(log, "\n");
+ }
+
+}
+
+// TODO: develop logic for choosing boundary characters - right now it is hardcoded
+// It should be a function of used scripts. Also, check whether we need to save
+// used script names
+void addUtilityChars(UnicodeSet &exemplarUSet, UErrorCode &status) {
+
+ // in order to get nice rules, we need to add some characters to the
+ // starting set. These are mostly parts of compatibity composed characters,
+ // such as L-middle dot (middle dot is 0x00B7). If we don't add these, we would
+ // get a reset at a funky character, such as L-middle dot. This list will probably
+ // grow.
+ exemplarUSet.add(0x00B7);
+
+ // these things represent a script before the target script and
+ // a script after. More logic should be added so that these characters are
+ // chosen automatically
+
+ exemplarUSet.add(0x0038);
+ exemplarUSet.add(0x0039);
+
+ //exemplarUSet.add(0x0433);
+ //exemplarUSet.add(0x0436);
+ exemplarUSet.add(0xfa29);
+ exemplarUSet.add(0xfa28);
+}
+
+void
+getExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) {
+ // first we fill out structures with exemplar characters.
+ UResourceBundle *res = ures_open(NULL, locale, &status);
+ int32_t exemplarLength = 0;
+ UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status);
+ exemplars.clear();
+ exemplars.applyPattern(exemplarString, status);
+ ures_close(res);
+}
+
+void
+prepareStartingSet(UnicodeSet &exemplarUSet, CompareFn comparer, UErrorCode &status) {
+ int32_t i = 0;
+ UnicodeString exemplarString;
+ exemplarUSet.toPattern(exemplarString);
+ // Produce case closure of exemplar characters
+ // Then we want to figure out what is the script of the exemplar characters
+ // just pick several and see their script
+ const char* usedScriptNames[USCRIPT_CODE_LIMIT];
+ int32_t numberOfUsedScripts = 0;
+ char scriptSetPattern[256];
+ UnicodeString pattern; // for debugging
+ UChar32 exChar = -1;
+ while(exemplarUSet.size() != 0 && (exChar = exemplarUSet.charAt(0)) != -1) {
+ int32_t scriptNo = u_getIntPropertyValue(exChar, UCHAR_SCRIPT);
+ usedScriptNames[numberOfUsedScripts] = u_getPropertyValueName(UCHAR_SCRIPT, scriptNo, U_SHORT_PROPERTY_NAME);
+ sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[numberOfUsedScripts]);
+ numberOfUsedScripts++;
+ UnicodeSet scriptSet(UnicodeString(scriptSetPattern, ""), status);
+ exemplarUSet.removeAll(scriptSet);
+ exemplarUSet.toPattern(pattern, TRUE);
+ }
+ exemplarUSet.clear();
+
+ // always add ASCII
+ //exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0020-\\u007f]", ""), status));
+ exemplarUSet.addAll(UnicodeSet(UnicodeString("[\\u0041-\\u005b]", ""), status));
+ if(gExemplar) {
+ exemplarUSet.applyPattern(exemplarString, status);
+ exemplarUSet.closeOver(USET_CASE);
+ if(!gQuiet) {
+ u_fprintf(out, "ICU exemplar characters:\n");
+ escapeString(exemplarString.getBuffer(), exemplarString.length(), out);
+ u_fprintf(out, "\n");
+ }
+ } else {
+ if(!gQuiet) {
+ u_fprintf(out, "Using scripts:\n");
+ }
+ // add interesting scripts
+ for(i = 0; i < numberOfUsedScripts; i++) {
+ sprintf(scriptSetPattern, "[:%s:]", usedScriptNames[i]);
+ exemplarUSet.addAll(UnicodeSet(UnicodeString(scriptSetPattern, ""), status));
+ if(!gQuiet) {
+ u_fprintf(out, "%s\n", scriptSetPattern);
+ }
+ }
+ }
+
+
+ removeIgnorableChars(exemplarUSet, comparer, status);
+
+ addUtilityChars(exemplarUSet, status);
+
+/*
+ // try to check whether tailored set and exemplar characters match.
+ USet *tailored = ucol_getTailoredSet(gCol, &status);
+ UBool tailoredContained = exemplarUSet.containsAll(*((UnicodeSet *)tailored));
+ if(!tailoredContained) {
+ ((UnicodeSet *)tailored)->removeAll(exemplarUSet);
+ UnicodeString pattern;
+ ((UnicodeSet *)tailored)->toPattern(pattern, TRUE);
+ }
+ uset_close(tailored);
+*/
+
+ //return exemplarUSet;
+}
+
+void
+setOutputFile(const char *name, UErrorCode &status) {
+ int32_t i = 0;
+ char filename[256];
+ strcpy(filename, name);
+ for(i = 0; i < gPlatformNo; i++) {
+ strcat(filename, "_");
+ strcat(filename, platforms[gPlatformIndexes[i]].name);
+ }
+ if(gExemplar) {
+ strcat(filename, "_exemplar");
+ } else {
+ strcat(filename, "_script");
+ }
+ strcat(filename, ".utf16.txt");
+ out = u_fopen(filename, "wb", "en", "utf-16");
+}
+
+void
+processCollator(UCollator *col, UErrorCode &status) {
+ int32_t i = 0;
+ gCol = col;
+ UChar ruleString[16384];
+ int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384);
+ if(!gQuiet) {
+ u_fprintf(out, "ICU rules:\n");
+ printRules(ruleString, ruleStringLength, out);
+ printRules(ruleString, ruleStringLength, log);
+ //escapeString(ruleString, ruleStringLength, out);
+ u_fprintf(out, "\n");
+ }
+ const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status);
+ UnicodeSet exemplarUSet;
+ if(locale) {
+ getExemplars(locale, exemplarUSet, status);
+ } else {
+ exemplarUSet = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status));
+ }
+
+
+ for(i = 0; i < gPlatformNo; i++) {
+ u_fprintf(out, "\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[i]].name);
+ gComparer = platforms[gPlatformIndexes[i]].comparer;
+
+ prepareStartingSet(exemplarUSet, gComparer, status);
+ int32_t itemLen = 0;
+ // get the number of all the items from the set (both codepoints and strings)
+ int32_t exemplarSetSize = exemplarUSet.size();
+ UnicodeSetIterator exemplarUSetIter(exemplarUSet);
+
+ // allocate ICU lines
+ gICULines = new Line*[exemplarSetSize*5];
+ int32_t j = 0;
+ int32_t linesCount = 0;
+ Line *lines = new Line[exemplarSetSize];
+
+ int32_t reversedSecondary = checkSecondaryOrdering();
+ if(reversedSecondary == 0) {
+ u_fprintf(out, "Secondaries do not seem to be reversed\n");
+ } else if(reversedSecondary == 1) {
+ u_fprintf(out, "Secondaries are reversed\n");
+ if(gComparer == ICUstrcmp) {
+ ucol_setAttribute(gCol, UCOL_FRENCH_COLLATION, UCOL_OFF, &status);
+ }
+ } else {
+ u_fprintf(out, "Cannot conclude if secondaries are reversed\n");
+ }
+
+ int32_t reversedCase = checkCaseOrdering();
+ if(reversedCase == 0) {
+ u_fprintf(out, "Case does not seem to be reversed\n");
+ } else if(reversedCase == 1) {
+ u_fprintf(out, "Case is reversed\n");
+ if(gComparer == ICUstrcmp) {
+ ucol_setAttribute(gCol, UCOL_CASE_FIRST, UCOL_OFF, &status);
+ }
+ } else {
+ u_fprintf(out, "Cannot conclude if case is reversed\n");
+ }
+
+ exemplarUSetIter.reset();
+ gElements.removeAll();
+ gExpansions.removeAll();
+ linesCount = 0;
+
+ while(exemplarUSetIter.next()) {
+ Line *currLine = lines+linesCount;
+ if(exemplarUSetIter.isString()) { // process a string
+ u_memcpy(currLine->name, exemplarUSetIter.getString().getBuffer(), exemplarUSetIter.getString().length());
+ currLine->len = exemplarUSetIter.getString().length();
+ } else { // process code point
+ UBool isError = FALSE;
+ currLine->len = 0;
+ U16_APPEND(currLine->name, currLine->len, 25, exemplarUSetIter.getCodepoint(), isError);
+ }
+ currLine->name[currLine->len] = 0; // zero terminate, for our evil ways
+ currLine->index = linesCount;
+ linesCount++;
+ noteElement(currLine);
+ }
+ constructAndAnalyze(gICULines, lines, exemplarSetSize, gComparer);
+
+ delete[] lines;
+ }
+
+
+ // cleanup globals
+ delete[] gICULines;
+ u_fflush(out);
+ u_fclose(out);
+ ucol_close(gCol);
+}
+
+void
+processLocale(const char *locale, UErrorCode &status) {
+ gWinLCID = uloc_getLCID(locale);
+
+ UCollator *col = ucol_open(locale, &status);
+
+ setOutputFile(locale, status);
+
+ u_fprintf(out, "Locale %s (LCID:%06X)\n", locale, gWinLCID);
+
+ processCollator(col, status);
+}
+
+UBool
+hasCollationElements(const char *locName) {
+
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle *ColEl = NULL;
+
+ UResourceBundle *loc = ures_open(NULL, locName, &status);;
+
+ if(U_SUCCESS(status)) {
+ status = U_ZERO_ERROR;
+ ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status);
+ if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
+ ures_close(ColEl);
+ ures_close(loc);
+ return TRUE;
+ }
+ ures_close(ColEl);
+ ures_close(loc);
+ }
+ return FALSE;
+}
+
+int
+main(int argc,
+ char* argv[])
+{
+ UErrorCode status = U_ZERO_ERROR;
+ err = u_finit(stderr, "en", "latin-1");
+ log = u_finit(stdout, "en", "latin-1");
+
+/*
+ USet *wsp = uprv_openRuleWhiteSpaceSet(&status);
+ uset_add(wsp, 0x0041);
+ uset_remove(wsp, 0x0041);
+ UnicodeString pat;
+ ((UnicodeSet *)wsp)->toPattern(pat, TRUE);
+ pat.setCharAt(pat.length(), 0);
+ escapeString(pat.getBuffer(), pat.length(), log);
+ u_fflush(log);
+*/
+
+ UTransliterator *anyHex = utrans_open("[^\\u000a\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status);
+ u_fsettransliterator(log, U_WRITE, anyHex, &status);
+
+ processArgs(argc, argv, status);
+ int32_t i = 0;
+
+
+ gElements.setValueDeleter(deleteLineElement);
+
+
+ if(U_FAILURE(status) || gPlatformNo == 0) {
+ return -1;
+ }
+
+ gUCA = ucol_open("root", &status);
+
+ if(gRulesStdin) {
+ char buffer[1024];
+ UChar ruleBuffer[16384];
+ UChar *rules = ruleBuffer;
+ int32_t maxRuleLen = 16384;
+ int32_t rLen = 0;
+ while(gets(buffer)) {
+ if(buffer[0] != '/' && buffer[1] != '/') {
+ rLen = u_unescape(buffer, rules, maxRuleLen);
+ rules += rLen;
+ maxRuleLen -= rLen;
+ }
+ }
+ UParseError parseError;
+ //escapeString(ruleBuffer, rules-ruleBuffer, log);//
+ u_fprintf(log, "%U\n", ruleBuffer);
+
+ UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
+ if(U_SUCCESS(status)) {
+ setOutputFile("stdinRules", status);
+ processCollator(col, status);
+ } else {
+ u_fprintf(err, "Error %s\n", u_errorName(status));
+ }
+ } else {
+
+ if(gLocale) {
+ processLocale(gLocale, status);
+ } else if(gLocaleNo) {
+ for(i = 0; i < gLocaleNo; i++) {
+ processLocale(gLocales[i], status);
+ }
+ } else { // do the loop through all the locales
+ int32_t noOfLoc = uloc_countAvailable();
+ const char *locName = NULL;
+ for(i = 0; i<noOfLoc; i++) {
+ status = U_ZERO_ERROR;
+ locName = uloc_getAvailable(i);
+ if(hasCollationElements(locName)) {
+ processLocale(locName, status);
+ }
+ }
+ }
+ }
+
+
+ ucol_close(gUCA);
+
+ u_fflush(log);
+ u_fclose(log);
+ u_fflush(err);
+ u_fclose(err);
+
+ return 0;
+}
\ No newline at end of file
diff --git a/colprobe/colprobe.dsp b/colprobe/colprobe.dsp
new file mode 100755
index 0000000..d274c85
--- /dev/null
+++ b/colprobe/colprobe.dsp
@@ -0,0 +1,148 @@
+# Microsoft Developer Studio Project File - Name="colprobe" - Package Owner=<4>
+# Microsoft Developer Studio Generated Build File, Format Version 6.00
+# ** DO NOT EDIT **
+
+# TARGTYPE "Win32 (x86) Console Application" 0x0103
+
+CFG=colprobe - Win32 Debug
+!MESSAGE This is not a valid makefile. To build this project using NMAKE,
+!MESSAGE use the Export Makefile command and run
+!MESSAGE
+!MESSAGE NMAKE /f "colprobe.mak".
+!MESSAGE
+!MESSAGE You can specify a configuration when running NMAKE
+!MESSAGE by defining the macro CFG on the command line. For example:
+!MESSAGE
+!MESSAGE NMAKE /f "colprobe.mak" CFG="colprobe - Win32 Debug"
+!MESSAGE
+!MESSAGE Possible choices for configuration are:
+!MESSAGE
+!MESSAGE "colprobe - Win32 Release" (based on "Win32 (x86) Console Application")
+!MESSAGE "colprobe - Win32 Debug" (based on "Win32 (x86) Console Application")
+!MESSAGE
+
+# Begin Project
+# PROP AllowPerConfigDependencies 0
+# PROP Scc_ProjName ""
+# PROP Scc_LocalPath ""
+CPP=cl.exe
+RSC=rc.exe
+
+!IF "$(CFG)" == "colprobe - Win32 Release"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 0
+# PROP BASE Output_Dir "Release"
+# PROP BASE Intermediate_Dir "Release"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 0
+# PROP Output_Dir "Release"
+# PROP Intermediate_Dir "Release"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /GX /O2 /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD CPP /nologo /W3 /GX /O2 /I "../../../include" /I "../../tools/toolutil" /I "../../common" /I "../../i18n" /D "WIN32" /D "NDEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+# ADD BASE RSC /l 0x409 /d "NDEBUG"
+# ADD RSC /l 0x409 /d "NDEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386
+# ADD LINK32 icuio.lib icuuc.lib icuin.lib icutu.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /machine:I386 /libpath:"../../../lib"
+
+!ELSEIF "$(CFG)" == "colprobe - Win32 Debug"
+
+# PROP BASE Use_MFC 0
+# PROP BASE Use_Debug_Libraries 1
+# PROP BASE Output_Dir "Debug"
+# PROP BASE Intermediate_Dir "Debug"
+# PROP BASE Target_Dir ""
+# PROP Use_MFC 0
+# PROP Use_Debug_Libraries 1
+# PROP Output_Dir "Debug"
+# PROP Intermediate_Dir "Debug"
+# PROP Ignore_Export_Lib 0
+# PROP Target_Dir ""
+MTL=midl.exe
+# ADD BASE CPP /nologo /W3 /Gm /GX /ZI /Od /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD CPP /nologo /W3 /Gm /GX /ZI /Od /I "../../../include" /I "../../tools/toolutil" /I "../../common" /I "../../i18n" /D "WIN32" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+# ADD BASE RSC /l 0x409 /d "_DEBUG"
+# ADD RSC /l 0x409 /d "_DEBUG"
+BSC32=bscmake.exe
+# ADD BASE BSC32 /nologo
+# ADD BSC32 /nologo
+LINK32=link.exe
+# ADD BASE LINK32 kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept
+# ADD LINK32 icuiod.lib icuucd.lib icuind.lib icutud.lib kernel32.lib user32.lib gdi32.lib winspool.lib comdlg32.lib advapi32.lib shell32.lib ole32.lib oleaut32.lib uuid.lib odbc32.lib odbccp32.lib /nologo /subsystem:console /debug /machine:I386 /pdbtype:sept /libpath:"../../../lib"
+
+!ENDIF
+
+# Begin Target
+
+# Name "colprobe - Win32 Release"
+# Name "colprobe - Win32 Debug"
+# Begin Group "Source Files"
+
+# PROP Default_Filter "cpp;c;cxx;rc;def;r;odl;idl;hpj;bat"
+# Begin Source File
+
+SOURCE=.\colprobeNew.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\line.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\sortedlines.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\strengthprobe.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\targetsetgenerator.cpp
+# End Source File
+# Begin Source File
+
+SOURCE=.\uprinter.cpp
+# End Source File
+# End Group
+# Begin Group "Header Files"
+
+# PROP Default_Filter "h;hpp;hxx;hm;inl"
+# Begin Source File
+
+SOURCE=.\colprobe.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\line.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\sortedlines.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\strengthprobe.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\targetsetgenerator.h
+# End Source File
+# Begin Source File
+
+SOURCE=.\uprinter.h
+# End Source File
+# End Group
+# Begin Group "Resource Files"
+
+# PROP Default_Filter "ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe"
+# End Group
+# End Target
+# End Project
diff --git a/colprobe/colprobe.h b/colprobe/colprobe.h
new file mode 100755
index 0000000..ca12c44
--- /dev/null
+++ b/colprobe/colprobe.h
@@ -0,0 +1,15 @@
+#ifndef COLPROBE_H
+#define COLPROBE_H
+
+#include "unicode/uniset.h"
+#include "unicode/normlzr.h"
+
+typedef int (*CompareFn) (const void *elem1, const void *elem2);
+typedef int (*GetSortKeyFn) (const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity);
+//typedef int (__cdecl *CompareFn)(const void *elem1, const void *elem2);
+void generateRepertoire(const char *locale, UnicodeSet &rep, UBool &hanAppears, UErrorCode &status);
+UnicodeSet flatten(const UnicodeSet &source, UErrorCode &status);
+
+//UnicodeSet generateRepertoire(const char *locale);
+
+#endif
diff --git a/colprobe/colprobeNew.cpp b/colprobe/colprobeNew.cpp
new file mode 100755
index 0000000..6bcfc51
--- /dev/null
+++ b/colprobe/colprobeNew.cpp
@@ -0,0 +1,1078 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File colprobe.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+#include "uoptions.h"
+#include "unicode/ucol.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ures.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/ustring.h"
+#include "unicode/uchar.h"
+#include "unicode/uscript.h"
+#include "unicode/locid.h"
+#include "unicode/ucnv.h"
+#include "uprops.h"
+#include "hash.h"
+#include "ucol_imp.h"
+
+#include "unicode/ustdio.h"
+#include "unicode/utrans.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+
+// unix tolower
+#include <ctype.h>
+// unix setlocale
+#include <locale.h>
+
+#include "colprobe.h"
+
+#include "line.h"
+#include "sortedlines.h"
+#include "strengthprobe.h"
+
+void testWin(StrengthProbe &probe, UErrorCode &status) ;
+
+#if defined WIN32
+#include <io.h>
+#include <windows.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <direct.h>
+
+int createDir(const char* dirName) {
+ struct _stat myStat;
+ int result = _stat(dirName, &myStat);
+
+ if(result == -1) {
+ result = _mkdir(dirName);
+ return result;
+ } else if(myStat.st_mode & _S_IFDIR) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+//#elif defined POSIX
+#else
+#include <sys/stat.h>
+#include <unistd.h>
+
+int createDir(const char* dirName) {
+ struct stat myStat;
+ int result = stat(dirName, &myStat);
+
+ if(result == -1) {
+ result = mkdir(dirName, S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IWOTH|S_IXOTH);
+ return result;
+ } else if(S_ISDIR(myStat.st_mode)) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+//
+// Stubs for Windows API functions when building on UNIXes.
+//
+typedef int DWORD;
+inline int CompareStringW(DWORD, DWORD, UChar *, int, UChar *, int) {return 0;};
+//#else
+//#error "Not POSIX or Windows. Won't work."
+#endif
+
+#include "line.h"
+
+static UBool gVerbose = FALSE;
+static UBool gDebug = FALSE;
+static UBool gQuiet = FALSE;
+static UBool gExemplar = FALSE;
+
+DWORD gWinLCID;
+int gCount;
+UCollator *gCol;
+UCollator *gUCA;
+UConverter *utf8cnv;
+CompareFn gComparer;
+int gRefNum;
+UnicodeSet gExcludeSet;
+UnicodeSet gRepertoire;
+
+const UChar separatorChar = 0x0030;
+
+UPrinter *logger;
+UPrinter *debug;
+UPrinter *tailoringBundle;
+UPrinter *referenceBundle;
+UPrinter *bundle;
+FILE *fTailoringDump;
+FILE *fDefaultDump;
+
+const char *progName = "colprobe";
+
+const char *gLocale = NULL;
+int32_t platformIndex = -1;
+int32_t gPlatformNo = 0;
+int32_t gPlatformIndexes[10];
+int32_t gLocaleNo = 0;
+const char* gLocales[100];
+UBool gRulesStdin = FALSE;
+const char *outputFormat = "HTML";
+const char *outExtension = "html";
+
+enum {
+ HELP1,
+ HELP2,
+ VERBOSE,
+ QUIET,
+ VERSION,
+ ICUDATADIR,
+ COPYRIGHT,
+ LOCALE,
+ PLATFORM,
+ DEBUG,
+ EXEMPLAR,
+ RULESSTDIN,
+ REFERENCE,
+ EXCLUDESET,
+ REPERTOIRE,
+ INTERACTIVE,
+ PRINTREF,
+ DIFF,
+ OUTPUT
+};
+
+UOption options[]={
+ /*0*/ UOPTION_HELP_H,
+ /*1*/ UOPTION_HELP_QUESTION_MARK,
+ /*2*/ UOPTION_VERBOSE,
+ /*3*/ UOPTION_QUIET,
+ /*4*/ UOPTION_VERSION,
+ /*5*/ UOPTION_ICUDATADIR,
+ /*6*/ UOPTION_COPYRIGHT,
+ /*7*/ UOPTION_DEF("locale", 'l', UOPT_REQUIRES_ARG),
+ /*8*/ UOPTION_DEF("platform", 'p', UOPT_REQUIRES_ARG),
+ /*9*/ UOPTION_DEF("debug", 'D', UOPT_NO_ARG),
+ /*10*/ UOPTION_DEF("exemplar", 'E', UOPT_NO_ARG),
+ /*11*/ UOPTION_DEF("rulesstdin", 'R', UOPT_NO_ARG),
+ /*12*/ UOPTION_DEF("ref", 'c', UOPT_REQUIRES_ARG),
+ /*13*/ UOPTION_DEF("excludeset", 'x', UOPT_REQUIRES_ARG),
+ /*14*/ UOPTION_DEF("repertoire", 't', UOPT_REQUIRES_ARG),
+ /*15*/ UOPTION_DEF("interactive", 'I', UOPT_NO_ARG),
+ /*16*/ UOPTION_DEF("printref", 0, UOPT_NO_ARG),
+ /*17*/ UOPTION_DEF("diff", 0, UOPT_NO_ARG),
+ /*18*/ UOPTION_DEF("output", 0, UOPT_REQUIRES_ARG)
+};
+
+UChar compA[256];
+UChar compB[256];
+int32_t compALen = 0;
+int32_t compBLen = 0;
+
+char compUTF8A[256];
+char compUTF8B[256];
+int32_t compUTF8ALen = 0;
+int32_t compUTF8BLen = 0;
+
+int UNIXstrcmp(const void *a, const void *b) {
+ UErrorCode status = U_ZERO_ERROR;
+ gCount++;
+ int t;
+ compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status);
+ compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status);
+ compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status);
+ compUTF8A[compUTF8ALen] = 0;
+ compUTF8BLen = ucnv_fromUChars(utf8cnv, compUTF8B, 256, compB, compBLen, &status);
+ compUTF8B[compUTF8BLen] = 0;
+ t = strcoll(compUTF8A, compUTF8B);
+ return t;
+}
+
+int UNIXgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
+ UErrorCode status = U_ZERO_ERROR;
+ compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status);
+ compUTF8ALen = ucnv_fromUChars(utf8cnv, compUTF8A, 256, compA, compALen, &status);
+ compUTF8A[compUTF8ALen] = 0;
+ return (strxfrm((char *)buffer, compUTF8A, buffCapacity)+1);
+}
+
+#ifdef WIN32
+int Winstrcmp(const void *a, const void *b) {
+ UErrorCode status = U_ZERO_ERROR;
+ gCount++;
+ int t;
+ //compALen = unorm_compose(compA, 256, (*(Line **)a)->name, (*(Line **)a)->len, FALSE, 0, &status);
+ //compBLen = unorm_compose(compB, 256, (*(Line **)b)->name, (*(Line **)b)->len, FALSE, 0, &status);
+ compALen = unorm_normalize((*(Line **)a)->name, (*(Line **)a)->len, UNORM_NFC, 0, compA, 256, &status);
+ compBLen = unorm_normalize((*(Line **)b)->name, (*(Line **)b)->len, UNORM_NFC, 0, compB, 256, &status);
+ t = CompareStringW(gWinLCID, SORT_STRINGSORT, //0,
+ compA, compALen,
+ compB, compBLen);
+
+/*
+ t = CompareStringW(gWinLCID, 0,
+ (*(Line **)a)->name, (*(Line **)a)->len,
+ (*(Line **)b)->name, (*(Line **)b)->len);
+*/
+ return t-2;
+}
+
+int WingetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
+ UErrorCode status = U_ZERO_ERROR;
+ compALen = unorm_normalize(string, len, UNORM_NFC, 0, compA, 256, &status);
+ return LCMapStringW(gWinLCID, LCMAP_SORTKEY | SORT_STRINGSORT, compA, compALen, (unsigned short *)buffer, buffCapacity);
+}
+
+#if 0
+int Winstrcmp(const void *a, const void *b) {
+ UErrorCode status = U_ZERO_ERROR;
+ uint8_t b1[256], b2[256];
+ int32_t b1Len, b2Len;
+ b1Len = WingetSortKey((*(Line **)a)->name, (*(Line **)a)->len, b1, 256);
+ b2Len = WingetSortKey((*(Line **)b)->name, (*(Line **)b)->len, b2, 256);
+
+ b1[b1Len] = 0;
+ b2[b2Len] = 0;
+
+ return strcmp((const char *)b1, (const char *)b2);
+}
+#endif
+
+#else
+int Winstrcmp(const void *a, const void *b) {
+ if(a == b);
+ return 0;
+}
+int WingetSortKey(const UChar *, int32_t , uint8_t *, int32_t ) {
+ return 0;
+}
+#endif
+
+int ICUstrcmp(const void *a, const void *b) {
+ gCount++;
+ UCollationResult t;
+ t = ucol_strcoll(gCol,
+ (*(Line **)a)->name, (*(Line **)a)->len,
+ (*(Line **)b)->name, (*(Line **)b)->len);
+ if (t == UCOL_LESS) return -1;
+ if (t == UCOL_GREATER) return +1;
+ return 0;
+}
+
+int ICUgetSortKey(const UChar *string, int32_t len, uint8_t *buffer, int32_t buffCapacity) {
+ return ucol_getSortKey(gCol, string, len, buffer, buffCapacity);
+}
+
+struct {
+ const char* name;
+ CompareFn comparer;
+ GetSortKeyFn skgetter;
+} platforms[] = {
+ { "icu", ICUstrcmp, ICUgetSortKey },
+ { "w2k", Winstrcmp, WingetSortKey},
+ { "winxp", Winstrcmp, WingetSortKey},
+ { "aix", UNIXstrcmp, UNIXgetSortKey},
+ { "linux", UNIXstrcmp, UNIXgetSortKey}
+};
+
+
+void stringToLower(char *string) {
+ uint32_t i = 0;
+ for(i = 0; i < strlen(string); i++) {
+ string[i] = tolower(string[i]);
+ }
+}
+
+void usage(const char *name) {
+ logger->log("Usage: %s --locale loc_name --platform platform\n", name);
+}
+
+void listKnownPlatforms() {
+ uint32_t i = 0;
+ logger->log("Known platforms:\n");
+ for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
+ logger->log("\t%s\n", platforms[i]);
+ }
+}
+
+void addPlatform(const char *platform) {
+ uint32_t i;
+ //stringToLower(platform);
+ int32_t oldPlatformNo = gPlatformNo;
+
+ for(i = 0; i < sizeof(platforms)/sizeof(platforms[0]); i++) {
+ if(strcmp(platform, platforms[i].name) == 0) {
+ gPlatformIndexes[gPlatformNo++] = i;
+ }
+ }
+ if(gPlatformNo == oldPlatformNo) {
+ logger->log("Unknown platform %s\n", platform);
+ listKnownPlatforms();
+ }
+}
+
+void processArgs(int argc, char* argv[], UErrorCode &status)
+{
+ int32_t i = 0;
+ U_MAIN_INIT_ARGS(argc, argv);
+
+ argc = u_parseArgs(argc, argv, (int32_t)(sizeof(options)/sizeof(options[0])), options);
+
+ if(argc < 0) {
+ logger->log("Unknown option: %s\n", argv[-argc]);
+ usage(progName);
+ return;
+ }
+
+ if(options[0].doesOccur || options[1].doesOccur) {
+ usage(progName);
+ return;
+ }
+ if(options[VERBOSE].doesOccur) {
+ gVerbose = TRUE;
+ }
+ if(options[DEBUG].doesOccur) {
+ gDebug = TRUE;
+ gVerbose = TRUE;
+ }
+ if(options[EXEMPLAR].doesOccur) {
+ gExemplar = TRUE;
+ }
+ if(options[QUIET].doesOccur) {
+ gQuiet = TRUE;
+ }
+
+ // ASCII based options specified on the command line
+ // this is for testing purposes, will allow to load
+ // up ICU rules and then poke through them.
+ // In that case, we test only ICU and don't need
+ // a locale.
+ if(options[RULESSTDIN].doesOccur) {
+ gRulesStdin = TRUE;
+ addPlatform("icu");
+ return;
+ }
+
+ if(options[LOCALE].doesOccur) {
+ gLocale = options[LOCALE].value;
+ } else {
+ gLocale = argv[1];
+ //for(i = 1; i < argc; i++) {
+ //gLocales[gLocaleNo++] = argv[i];
+ //}
+ }
+
+ if(options[PLATFORM].doesOccur) {
+ addPlatform(options[PLATFORM].value);
+ } else { // there is a list of platforms
+ addPlatform("icu");
+ }
+
+ if(options[REFERENCE].doesOccur) {
+ for(i = 0; i < (int32_t)(sizeof(platforms)/sizeof(platforms[0])); i++) {
+ if(strcmp(options[REFERENCE].value, platforms[i].name) == 0) {
+ gRefNum = i;
+ break;
+ }
+ }
+ if(i == sizeof(platforms)/sizeof(platforms[0])) {
+ logger->log("Unknown reference %s!\n", options[REFERENCE].value);
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return;
+ }
+ } else {
+ gRefNum = 0;
+ }
+
+ if(options[EXCLUDESET].doesOccur) {
+ gExcludeSet.applyPattern(UnicodeString(options[EXCLUDESET].value), status);
+ if(U_FAILURE(status)) {
+ logger->log("Cannot construct exclude set from argument %s. Error %s\n", options[EXCLUDESET].value, u_errorName(status));
+ return;
+ } else {
+ UnicodeString pattern;
+ logger->log(gExcludeSet.toPattern(pattern, TRUE), TRUE);
+ }
+ }
+
+ if(options[REPERTOIRE].doesOccur) {
+ gRepertoire.applyPattern(UnicodeString(options[REPERTOIRE].value), status);
+ if(U_FAILURE(status)) {
+ logger->log("Cannot construct repertoire from argument %s. Error %s\n", options[REPERTOIRE].value, u_errorName(status));
+ return;
+ }
+ }
+
+ if(options[OUTPUT].doesOccur) {
+ outputFormat = options[OUTPUT].value;
+ if(strcmp(outputFormat, "HTML") == 0) {
+ outExtension = "html";
+ } else if(strcmp(outputFormat, "XML") == 0) {
+ outExtension = "xml";
+ } else {
+ outExtension = "txt";
+ }
+ }
+
+}
+
+// Check whether upper case comes before lower case or vice-versa
+int32_t
+checkCaseOrdering(void) {
+ UChar stuff[][3] = {
+ { 0x0061, separatorChar, 0x0061}, //"aa",
+ { 0x0061, separatorChar, 0x0041 }, //"a\\u00E0",
+ { 0x0041, separatorChar, 0x0061 }, //"\\u00E0a",
+ { 0x0041, separatorChar, 0x0041 }, //"\\u00E0a",
+ //{ 0x00E0, separatorChar, 0x00E0 } //"\\u00E0\\u00E0"
+ };
+ const int32_t size = sizeof(stuff)/sizeof(stuff[0]);
+
+ Line **sortedLines = new Line*[size];
+ Line lines[size];
+
+ int32_t i = 0;
+ int32_t ordered = 0, reversed = 0;
+
+ for(i = 0; i < size; i++) {
+ lines[i].setName(stuff[i], 3);
+ }
+ //setArray(sortedLines, lines, size);
+ qsort(sortedLines, size, sizeof(Line*), gComparer);
+
+ for(i = 0; i < size; i++) {
+ if(*(sortedLines+i) == &lines[i]) {
+ ordered++;
+ }
+ if(*(sortedLines+i) == &lines[size-i-1]) {
+ reversed++;
+ }
+ }
+
+ delete[] sortedLines;
+ if(ordered == size) {
+ return 0; // in normal order
+ } else if(reversed == size) {
+ return 1; // in reversed order
+ } else {
+ return -1; // unknown order
+ }
+}
+
+void
+getExemplars(const char *locale, UnicodeSet &exemplars, UErrorCode &status) {
+ // first we fill out structures with exemplar characters.
+ UResourceBundle *res = ures_open(NULL, locale, &status);
+ UnicodeString exemplarString = ures_getUnicodeStringByKey(res, "ExemplarCharacters", &status);
+ exemplars.clear();
+ exemplars.applyPattern(exemplarString, status);
+ ures_close(res);
+}
+
+
+void
+getFileNames(const char *name, char *tailoringName, char *tailoringDumpName, char *defaultName, char *defaultDumpName, char *diffName) {
+ if(tailoringName) {
+ strcpy(tailoringName, platforms[gPlatformIndexes[0]].name);
+ strcat(tailoringName, "/");
+ strcat(tailoringName, name);
+ strcat(tailoringName, "_raw.");
+ strcat(tailoringName, outExtension);
+ }
+ if(tailoringDumpName) {
+ strcpy(tailoringDumpName, platforms[gPlatformIndexes[0]].name);
+ strcat(tailoringDumpName, "/");
+ strcat(tailoringDumpName, name);
+ strcat(tailoringDumpName, ".dump");
+ }
+
+ if(diffName) {
+ strcpy(diffName, platforms[gPlatformIndexes[0]].name);
+ strcat(diffName, "/");
+ strcat(diffName, name);
+ strcat(diffName, "_collation.");
+ strcat(diffName, outExtension);
+ }
+
+ if(defaultName) {
+ strcpy(defaultName, platforms[gRefNum].name);
+ strcat(defaultName, "/");
+ strcat(defaultName, name);
+ strcat(defaultName, "_default_raw.");
+ strcat(defaultName, outExtension);
+ }
+
+ if(defaultDumpName) {
+ strcpy(defaultDumpName, platforms[gRefNum].name);
+ strcat(defaultDumpName, "/");
+ strcat(defaultDumpName, name);
+ strcat(defaultDumpName, "_default.dump");
+ }
+}
+
+void
+setFiles(const char *name, UErrorCode &status) {
+ if(U_FAILURE(status)) {
+ return;
+ }
+ int32_t i = 0;
+ char tailoringName[256];
+ char tailoringDumpName[256];
+ char defaultName[256];
+ char defaultDumpName[256];
+ char diffName[256];
+
+ getFileNames(name, tailoringName, tailoringDumpName, defaultName, defaultDumpName, diffName);
+ if(options[PLATFORM].doesOccur && !options[DIFF].doesOccur) {
+ if(createDir(platforms[gPlatformIndexes[0]].name) == 0) {
+ tailoringBundle = new UPrinter(tailoringName, "en", "utf-8", NULL, FALSE);
+ fTailoringDump = fopen(tailoringDumpName, "wb");
+ } else {
+ status = U_FILE_ACCESS_ERROR;
+ return;
+ }
+ }
+
+ if(options[REFERENCE].doesOccur && !options[DIFF].doesOccur) {
+ if(createDir(platforms[gRefNum].name) == 0) {
+ referenceBundle = new UPrinter(defaultName, "en", "utf-8", NULL, FALSE);
+ fDefaultDump = fopen(defaultDumpName, "wb");
+ } else {
+ status = U_FILE_ACCESS_ERROR;
+ return;
+ }
+ }
+
+ if((options[PLATFORM].doesOccur && options[REFERENCE].doesOccur) || options[DIFF].doesOccur) {
+ if(createDir(platforms[gPlatformIndexes[0]].name) == 0) {
+ bundle = new UPrinter(diffName, "en", "utf-8", NULL, FALSE);
+ }
+ }
+ if(options[DIFF].doesOccur) {
+ fTailoringDump = fopen(tailoringDumpName, "rb");
+ fDefaultDump = fopen(defaultDumpName, "rb");
+ }
+}
+
+
+UErrorCode status = U_ZERO_ERROR;
+static UnicodeSet UNASSIGNED(UnicodeString("[:Cn:]"), status);
+static UnicodeSet GENERAL_ACCENTS(UnicodeString("[[:block=Combining Diacritical Marks:]-[:Cn:]]"), status);
+//static UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]-[:L:]-[:N:]]"), status);
+static UnicodeSet ASCII_BASE(UnicodeString("[[:ASCII:]]"), status);
+static UnicodeSet ALPHABETIC(UnicodeString("[:alphabetic:]"), status);
+//static UnicodeSet CONTROL(UnicodeString("[[:control:][\\u0000-\\u002F]]"), status);
+static UnicodeSet BMP(UnicodeString("[\\u0000-\\uFFFF]"), status);
+
+static UnicodeSet CONTROL(UnicodeString("[:control:]"), status);
+
+UCollator *
+setLocale(const char* locale, UErrorCode &status)
+{
+ gWinLCID = uloc_getLCID(locale);
+ setlocale(LC_COLLATE, locale);
+
+ if(gCol) {
+ ucol_close(gCol);
+ }
+ gCol = ucol_open(locale, &status);
+ ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+ //ucol_setAttribute(col, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, &status);
+ //ucol_setAttribute(col, UCOL_STRENGTH, UCOL_QUATERNARY, &status);
+
+ return gCol;
+}
+
+
+
+UCollator *
+setReference(UErrorCode &status)
+{
+ gWinLCID = uloc_getLCID("en");
+ setlocale(LC_COLLATE, "en_US.UTF-8");
+ if(gCol) {
+ ucol_close(gCol);
+ }
+ gCol = ucol_open("root", &status);
+ ucol_setAttribute(gCol, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
+ return gCol;
+}
+
+void
+processInteractive() {
+ char command[256];
+ while(fgets(command, 256, stdin)) {
+
+ }
+}
+
+UChar probeChars[][4] = {
+ { 0x0061, 0x0062, 0x00E1, 0x0041 }, // latin with a-grave
+ { 0x0041, 0x0042, 0x00C1, 0x0061 }, // upper first
+ { 0x006E, 0x006F, 0x00F1, 0x004E }, // latin with n-tilda
+ { 0x004E, 0x004F, 0x00D1, 0x006E }, // upper first
+ { 0x0433, 0x0493, 0x0491, 0x0413 }, // Cyrillic
+ { 0x0413, 0x0492, 0x0490, 0x0433 }, // upper first
+ { 0x3045, 0x3047, 0x3094, 0x3046 } // Hiragana/Katakana (last resort)
+
+};
+
+void
+processCollator(UCollator *col, UErrorCode &status) {
+ int32_t i = 0;
+ uint32_t j = 0;
+ gCol = col;
+ UChar ruleString[16384];
+ char myLoc[256];
+
+ int32_t ruleStringLength = ucol_getRulesEx(gCol, UCOL_TAILORING_ONLY, ruleString, 16384);
+ logger->log(UnicodeString(ruleString, ruleStringLength), TRUE);
+ const char *locale = ucol_getLocale(gCol, ULOC_REQUESTED_LOCALE, &status);
+ if(locale == NULL) {
+ locale = "en";
+ }
+ strcpy(myLoc, locale);
+ UnicodeSet exemplarUSet;
+ UnicodeSet RefRepertoire;
+
+ UnicodeSet tailored;
+
+ tailored = *((UnicodeSet *)ucol_getTailoredSet(gCol, &status));
+ tailored.removeAll(CONTROL);
+
+
+ UnicodeString pattern;
+ int sanityResult;
+
+ UnicodeSet hanSet;
+ UBool hanAppears = FALSE;
+
+ debug->log("\nGenerating order for platform: %s\n", platforms[gPlatformIndexes[0]].name);
+ gComparer = platforms[gPlatformIndexes[0]].comparer;
+
+ StrengthProbe probe(platforms[gPlatformIndexes[0]].comparer, platforms[gPlatformIndexes[0]].skgetter, 0x0030, probeChars[0][0], probeChars[0][1], probeChars[0][2], probeChars[0][3]);
+ sanityResult = probe.checkSanity();
+ j = 0;
+ while(sanityResult && j+1 < sizeof(probeChars)/sizeof(probeChars[0])) {
+ j++;
+ sanityResult = probe.setProbeChars(probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]);
+ }
+ if(sanityResult) {
+ logger->log("Bad choice of probe characters! Sanity returned %i. Exiting\n", sanityResult, sanityResult);
+ return;
+ }
+ logger->log("Probe chars: %C, %C, %C, %C\n", probeChars[j][0], probeChars[j][1], probeChars[j][2], probeChars[j][3]);
+
+ debug->off();
+
+ if(gRepertoire.size()) {
+ exemplarUSet = gRepertoire;
+ } else {
+ generateRepertoire(locale, exemplarUSet, hanAppears, status);
+ }
+ exemplarUSet.addAll(tailored);
+ hanSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
+ exemplarUSet.removeAll(hanSet);
+
+ logger->log(exemplarUSet.toPattern(pattern, TRUE), TRUE);
+
+ exemplarUSet = flatten(exemplarUSet, status);
+ logger->log(exemplarUSet.toPattern(pattern, TRUE), TRUE);
+
+ if(!options[PRINTREF].doesOccur) {
+
+ logger->log("\n*** Detecting ordering for the locale\n\n");
+
+ debug->on();
+ SortedLines lines(exemplarUSet, gExcludeSet, probe, logger, debug);
+ lines.analyse(status);
+ lines.calculateSortKeys();
+ debug->log("\n*** Final order\n\n");
+ debug->log(lines.toPrettyString(TRUE, TRUE), TRUE);
+ lines.toFile(fTailoringDump, TRUE, status);
+ tailoringBundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, TRUE, TRUE, hanAppears), TRUE);
+ //debug->off();
+
+ if(options[REFERENCE].doesOccur) {
+ status = U_ZERO_ERROR;
+ lines.getRepertoire(RefRepertoire);
+ setReference(status);
+
+ logger->log(exemplarUSet.toPattern(pattern, TRUE), TRUE);
+ logger->log(RefRepertoire.toPattern(pattern, TRUE), TRUE);
+
+ StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter);
+ logger->log("\n*** Detecting ordering for reference\n\n");
+ SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug);
+ RefLines.analyse(status);
+ referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, TRUE, TRUE, FALSE), TRUE);
+ RefLines.toFile(fDefaultDump, TRUE, status);
+
+ lines.reduceDifference(RefLines);
+ logger->log("\n*** Final rules\n\n");
+ logger->log(lines.toPrettyString(TRUE), TRUE);
+ bundle->log(lines.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, TRUE, TRUE, hanAppears), TRUE);
+ }
+ } else {
+ setReference(status);
+ StrengthProbe RefProbe(platforms[gRefNum].comparer, platforms[gRefNum].skgetter);
+ logger->log("\n*** Detecting ordering for reference\n\n");
+ SortedLines RefLines(exemplarUSet, gExcludeSet, RefProbe, logger, debug);
+ RefLines.analyse(status);
+ logger->log(RefLines.toPrettyString(TRUE), TRUE);
+ referenceBundle->log(RefLines.toOutput(outputFormat, myLoc, platforms[gRefNum].name, NULL, TRUE, TRUE, FALSE), TRUE);
+ }
+ if(hanAppears) {
+ // there are Han characters. This is a huge block. The best we can do is to just sort it, compare to empty
+ // and spit it out. Anything else would be a suicide (actually is - kernel just kills you :)
+ logger->log("\n*** Detecting order for Han\n");
+ debug->off();
+ setLocale(gLocale, status);
+ exemplarUSet.clear();
+ exemplarUSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
+ exemplarUSet = flatten(exemplarUSet, status);
+ SortedLines han(exemplarUSet, gExcludeSet, probe, logger, debug);
+ han.sort(TRUE, TRUE);
+ han.classifyRepertoire();
+ han.getBounds(status);
+ tailoringBundle->log("Han ordering:<br>\n");
+ tailoringBundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, TRUE, FALSE, FALSE), TRUE);
+ bundle->log(han.toOutput(outputFormat, myLoc, platforms[gPlatformIndexes[0]].name, NULL, TRUE, FALSE, FALSE), TRUE);
+ }
+ ucol_close(gCol);
+}
+
+void
+processLocale(const char *locale, UErrorCode &status) {
+ setLocale(locale, status);
+ setFiles(locale, status);
+ if(U_FAILURE(status)) {
+ return;
+ }
+
+ debug->log("Locale %s (LCID:%06X, unix:%s)\n", locale, gWinLCID, setlocale(LC_COLLATE, NULL));
+ tailoringBundle->log("// Ordering for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n",
+ locale, gWinLCID, setlocale(LC_COLLATE, NULL),
+ platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name);
+ if(options[REFERENCE].doesOccur) {
+ referenceBundle->log("// Reference for locale %s (LCID:%06X, unix:%s), platform %s reference %s<br>\n",
+ locale, gWinLCID, setlocale(LC_COLLATE, NULL),
+ platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name);
+ }
+
+
+ processCollator(gCol, status);
+}
+
+
+
+UBool
+hasCollationElements(const char *locName) {
+
+ UErrorCode status = U_ZERO_ERROR;
+ UResourceBundle *ColEl = NULL;
+
+ UResourceBundle *loc = ures_open(NULL, locName, &status);;
+
+ if(U_SUCCESS(status)) {
+ status = U_ZERO_ERROR;
+ ColEl = ures_getByKey(loc, "CollationElements", ColEl, &status);
+ if(status == U_ZERO_ERROR) { /* do the test - there are real elements */
+ ures_close(ColEl);
+ ures_close(loc);
+ return TRUE;
+ }
+ ures_close(ColEl);
+ ures_close(loc);
+ }
+ return FALSE;
+}
+
+int
+main(int argc,
+ char* argv[])
+{
+ UErrorCode status = U_ZERO_ERROR;
+ logger = new UPrinter(stdout, "en", "latin-1");
+ debug = new UPrinter(stderr, "en", "latin-1");
+
+/*
+ USet *wsp = uprv_openRuleWhiteSpaceSet(&status);
+ uset_add(wsp, 0x0041);
+ uset_remove(wsp, 0x0041);
+ UnicodeString pat;
+ ((UnicodeSet *)wsp)->toPattern(pat, TRUE);
+ pat.setCharAt(pat.length(), 0);
+ escapeString(pat.getBuffer(), pat.length(), log);
+ u_fflush(log);
+*/
+
+ processArgs(argc, argv, status);
+ int32_t i = 0;
+
+
+
+ if(U_FAILURE(status) || gPlatformNo == 0) {
+ return -1;
+ }
+
+ utf8cnv = ucnv_open("utf-8", &status); // we are just doing UTF-8 locales for now.
+ gUCA = ucol_open("root", &status);
+
+ if(options[INTERACTIVE].doesOccur) {
+ processInteractive();
+ } else {
+ if(gRulesStdin) {
+ char buffer[1024];
+ UChar ruleBuffer[16384];
+ UChar *rules = ruleBuffer;
+ int32_t maxRuleLen = 16384;
+ int32_t rLen = 0;
+ while(fgets(buffer, 1024, stdin)) {
+ if(buffer[0] != '/' && buffer[1] != '/') {
+ rLen = u_unescape(buffer, rules, maxRuleLen);
+ rules += rLen;
+ maxRuleLen -= rLen;
+ }
+ }
+ UParseError parseError;
+ //escapeString(ruleBuffer, rules-ruleBuffer, log);//
+ debug->log("%U\n", ruleBuffer);
+
+ UCollator *col = ucol_openRules(ruleBuffer, rules-ruleBuffer, UCOL_DEFAULT, UCOL_DEFAULT, &parseError, &status);
+ if(U_SUCCESS(status)) {
+ setFiles("stdinRules", status);
+ processCollator(col, status);
+ } else {
+ logger->log("Error %s\n", u_errorName(status));
+ }
+ } else if(options[DIFF].doesOccur) {
+ logger->log("Diffing two dumps\n");
+ // must have locale, platform and ref in order to be
+ // able to find dump files.
+ setFiles(gLocale, status);
+
+ if(fTailoringDump && fDefaultDump) {
+ SortedLines tailoring(fTailoringDump, logger, debug, status);
+ logger->log(tailoring.toString(TRUE), TRUE);
+ SortedLines reference(fDefaultDump, logger, debug, status);
+ logger->log(reference.toString(TRUE), TRUE);
+ tailoring.reduceDifference(reference);
+ logger->log("\n*** Final rules\n\n");
+ logger->log(tailoring.toPrettyString(TRUE), TRUE);
+ //result->log(lines.toPrettyString(TRUE), TRUE);
+ bundle->log(tailoring.toOutput(outputFormat, gLocale, platforms[gPlatformIndexes[0]].name, platforms[gRefNum].name, TRUE, TRUE, FALSE), TRUE);
+ }
+
+ } else {
+ if(gLocale) {
+ processLocale(gLocale, status);
+ } else if(gLocaleNo) {
+ for(i = 0; i < gLocaleNo; i++) {
+ processLocale(gLocales[i], status);
+ }
+ } else { // do the loop through all the locales
+ int32_t noOfLoc = uloc_countAvailable();
+ const char *locName = NULL;
+ for(i = 0; i<noOfLoc; i++) {
+ status = U_ZERO_ERROR;
+ locName = uloc_getAvailable(i);
+ if(hasCollationElements(locName)) {
+ processLocale(locName, status);
+ }
+ }
+ }
+ }
+ }
+
+
+ ucol_close(gUCA);
+ ucnv_close(utf8cnv);
+
+ delete logger;
+ delete debug;
+ if(tailoringBundle) {
+ delete tailoringBundle;
+ }
+ if(referenceBundle) {
+ delete referenceBundle;
+ }
+ if(bundle) {
+ delete bundle;
+ }
+ if(fTailoringDump) {
+ fclose(fTailoringDump);
+ }
+ if(fDefaultDump) {
+ fclose(fDefaultDump);
+ }
+ return 0;
+}
+
+
+UnicodeString propertyAndValueName(UProperty prop, int32_t i) {
+ UnicodeString result;
+ result.append(u_getPropertyName(prop, U_LONG_PROPERTY_NAME));
+ result.append("=");
+ result.append(u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME));
+
+ //+ "(" + prop + "," + i + ") ";
+ return result;
+}
+
+
+void generateRepertoire(const char *locale, UnicodeSet &rep, UBool &hanAppears, UErrorCode &status) {
+ UnicodeString dispName;
+ debug->log("Getting repertoire for %s\n", locale);
+ tailoringBundle->log("// Scripts in repertoire: ");
+ if(options[REFERENCE].doesOccur) {
+ referenceBundle->log("// Scripts in repertoire: ");
+ }
+ rep.clear();
+ UnicodeSet delta;
+
+ UScriptCode script[256];
+ int32_t i = 0;
+ // now add the scripts for the locale
+ UProperty prop = UCHAR_SCRIPT;
+ int32_t scriptLength = uscript_getCode(locale, script, 256, &status);
+ if(scriptLength) {
+ for (i = 0; i < scriptLength; ++i) {
+ if(script[i] == USCRIPT_HAN) {
+ hanAppears = TRUE;
+ continue;
+ }
+ delta.applyIntPropertyValue(prop, script[i], status);
+ debug->log("Adding ");
+ debug->log(propertyAndValueName(prop, script[i]), TRUE);
+ tailoringBundle->log("// ");
+ tailoringBundle->log(propertyAndValueName(prop, script[i]), TRUE);
+ if(options[REFERENCE].doesOccur) {
+ referenceBundle->log("// ");
+ referenceBundle->log(propertyAndValueName(prop, script[i]), TRUE);
+ }
+ rep.addAll(delta);
+ }
+ } else {
+ delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_LATIN, status);
+ rep.addAll(delta);
+ }
+
+ // now see which blocks those overlap, and add
+ prop = UCHAR_BLOCK;
+ int32_t min = u_getIntPropertyMinValue(prop);
+ int32_t max = u_getIntPropertyMaxValue(prop);
+ UnicodeSet checkDelta;
+ for (i = min; i <= max; ++i) {
+ // skip certain blocks
+ const char *name = u_getPropertyValueName(prop, i, U_LONG_PROPERTY_NAME);
+ if (strcmp(name, "Superscripts_and_Subscripts") == 0
+ || strcmp(name, "Letterlike_Symbols") == 0
+ || strcmp(name, "Alphabetic_Presentation_Forms") == 0
+ || strcmp(name, "Halfwidth_and_Fullwidth_Forms") == 0) continue;
+
+ delta.applyIntPropertyValue(prop, i, status).removeAll(UNASSIGNED);
+ if (!rep.containsSome(delta)) continue;
+ if (rep.containsAll(delta)) continue; // just to see what we are adding
+ debug->log("Adding ");
+ debug->log(propertyAndValueName(prop, i), TRUE);
+ tailoringBundle->log("// ");
+ tailoringBundle->log(propertyAndValueName(prop, i), TRUE);
+ if(options[REFERENCE].doesOccur) {
+ referenceBundle->log("// ");
+ referenceBundle->log(propertyAndValueName(prop, i), TRUE);
+ }
+ rep.addAll(delta);
+ }
+
+ // add ASCII and general accents
+ rep.addAll(GENERAL_ACCENTS).addAll(ASCII_BASE);
+ rep.removeAll(CONTROL);
+ //delta.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_HAN, status);
+ //rep.removeAll(delta);
+
+ // now add the exemplar characters
+ // can't get at them from Java right now
+ tailoringBundle->log("<br>\n");
+ if(options[REFERENCE].doesOccur) {
+ referenceBundle->log("<br>\n");
+ }
+}
+
+UnicodeSet flatten(const UnicodeSet &source, UErrorCode &status) {
+ UnicodeSet result;
+ UnicodeSetIterator it(source);
+ UnicodeString item, itemNFKD, toNormalize;
+ while (it.next()) {
+ // would be nicer if UnicodeSetIterator had a getString function
+ if (it.isString()) {
+ Normalizer::normalize(it.getString(), UNORM_NFD, 0, item, status);
+ Normalizer::normalize(it.getString(), UNORM_NFKD, 0, itemNFKD, status);
+ } else {
+ toNormalize.setTo(it.getCodepoint());
+ Normalizer::normalize(toNormalize, UNORM_NFD, 0, item, status);
+ Normalizer::normalize(toNormalize, UNORM_NFKD, 0, itemNFKD, status);
+ }
+ result.addAll(item);
+ result.addAll(itemNFKD);
+ }
+ return result;
+}
+
+
+void testWin(StrengthProbe &probe, UErrorCode &status)
+{
+ UnicodeSet trailings(UnicodeString("[\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651]"), status);
+ char intChar[] = "\\uFE7D\\uFE7C\\u30FD\\uFF70\\u30FC\\u309D\\u3032\\u3031\\u3005\\u0651";
+ UChar interesting[256];
+ int32_t intLen = u_unescape(intChar, interesting, 256);
+ UChar i = 0;
+ UChar j = 0, k = 0;
+ int32_t count;
+ Line myCh, combo, trial, inter, kLine;
+ for(i = 0; i < intLen; i++) {
+ inter.setTo(interesting[i]);
+ logger->log(inter.toString(TRUE), TRUE);
+ logger->log("----------------------\n");
+ for(j = 0; j < 0xFFFF; j++) {
+ myCh.setTo(j);
+ if(probe.distanceFromEmptyString(myCh) == UCOL_IDENTICAL) {
+ continue;
+ }
+ logger->log(myCh.toString(TRUE));
+ combo.setTo(j);
+ combo.append(interesting[i]);
+ count = 0;
+ for(k = 0; k < 0xFFFF; k++) {
+ kLine.setTo(k);
+ trial.setTo(j);
+ trial.append(k);
+ if(probe.compare(kLine, inter) < 0) {
+ if(probe.compare(trial, combo) >= 0) {
+ count++;
+ }
+ }
+ }
+ logger->log("%i %i\n", count, count);
+ }
+ }
+}
+
diff --git a/colprobe/createComparisonTables.pl b/colprobe/createComparisonTables.pl
new file mode 100755
index 0000000..18f6a14
--- /dev/null
+++ b/colprobe/createComparisonTables.pl
@@ -0,0 +1,164 @@
+#! /usr/bin/perl -w
+
+use strict;
+
+
+my $locale = $ARGV[0];
+
+
+my $long_name = `/home/weiv/src/icu/source/extra/colprobe/longname $locale`;
+my $pageTitle = $locale."_collation";
+my $filename = $pageTitle.".html";
+
+open TABLE, ">$filename";
+
+
+print TABLE <<"EndOfTemplate";
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>$pageTitle</title>
+<style>
+ <!--
+ table { border-spacing: 0; border-collapse: collapse; width: 100%;
+ border: 1px solid black }
+td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
+ vertical-align: top; border: 1px solid black }
+-->
+ </style>
+</head>
+
+<body bgcolor="#FFFFFF">
+
+<p><b><font color="#FF0000">Collation:</font> $locale ($long_name) <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
+
+<a href="../all_diff_xml/comparison_charts.html">Cover
+Page</a>, <a href="../all_diff_xml/index.html">Index</a></b></p>
+<table>
+ <tr>
+EndOfTemplate
+
+my $dirCommon = "common";
+my $refCommon = $dirCommon."/UCARules.txt";
+my $nameCommon = $dirCommon."/".$locale."_collation.html";
+my $colorCommon = "#AD989D";
+
+my $loc;
+
+if(!(-e $nameCommon)) {
+ $locale =~ /_/;
+ $loc = $`;
+ $nameCommon = "$dirCommon/$loc"."_collation.html";
+}
+print TABLE " <th bgcolor=\"$colorCommon\">COMMON (<a href=\"$refCommon\">UCA</a> <a href=\"../$dirCommon/xml/$locale.xml\">xml</a>)</th>\n";
+
+my $dirLinux = "linux";
+my $refLinux = $dirLinux."/".$locale.".utf8_default_raw.html";
+my $rawLinux = $dirLinux."/".$locale.".utf8_raw.html";
+my $defLinux = $dirLinux."/".$locale;
+my $nameLinux = "$dirLinux/$locale".".utf8_collation.html";
+my $colorLinux = "#1191F1";
+
+print TABLE " <th bgcolor=\"$colorLinux\">LINUX (";
+if (!(-e $nameLinux)) {
+#try the variant that has @euro stuck in
+ $nameLinux = "$dirLinux/$locale".'.utf8@euro_collation.html';
+ if(-e $nameLinux) {
+ $refLinux = $dirLinux."/".$locale.'.utf8@euro_default_raw.html';
+ $rawLinux = $dirLinux."/".$locale.'.utf8@euro_raw.html';
+ }
+}
+if (-e $nameLinux) {
+ print TABLE "<a href=\"$rawLinux\">Ordering</a> <a href=\"$defLinux\">Definition</a> <a href=\"$refLinux\">base</a>";
+}
+
+print TABLE " <a href=\"../$dirLinux/xml/$locale.xml\">xml</a>)</th>\n";
+
+my $dirWin = "winxp";
+my $refWin = $dirWin."/".$locale."_default_raw.html";
+my $rawWin = $dirWin."/".$locale."_raw.html";
+my $nameWin = "$dirWin/$locale"."_collation.html";
+my $colorWin = "#98FB98";
+
+print TABLE " <th bgcolor=\"$colorWin\">WINDOWS (";
+if (-e $nameWin) {
+ print TABLE "<a href=\"$rawWin\">Ordering</a> <a href=\"$refWin\">base</a> ";
+}
+print TABLE "<a href=\"../windows/xml/$locale.xml\">xml</a>)</th>\n";
+
+print TABLE " </tr>\n <tr>";
+
+
+readRules($nameCommon, "#AD989D", "Same as the UCA.");
+readRules($nameLinux, "#1191F1", "No data available.");
+readRules($nameWin, "#98FB98", "No data available.");
+
+
+print TABLE <<"EndOfFooter";
+ </tr>
+</table>
+
+</body>
+</html>
+EndOfFooter
+
+
+sub readRules {
+ # readRules($file, $color)
+ my $filename = shift;
+ my $color = shift;
+ my $comment = shift;
+ my $noLines = 0;
+ my $printOut = 0;
+
+ my $file;
+
+ if(-e $filename) {
+ open($file, "<$filename") || die "something very strange happened\n";
+ print TABLE "<td bgcolor=\"$color\">\n";
+ while (<$file>) {
+ if (/\}\<br\>$/) {
+ $printOut = 0;
+
+ }
+ if ($printOut) {
+ print TABLE $_;
+ $noLines++;
+ }
+ if (/Sequence/) {
+ $printOut = 1;
+ print "found sequence\n";
+ $noLines = 0;
+ }
+
+ }
+ if (!$noLines) {
+ print TABLE "Same ordering as base\n";
+ }
+ print TABLE "</td>\n";
+ } else {
+ print TABLE "<td bgcolor=\"$color\">\n$comment</td>\n";
+ }
+}
+
+
+# Tasting of food product
+# 650-574-4551 $50 1 hour
+
+
+# <td bgcolor="#AD989D">1.0-alpha</td>
+# <td bgcolor="#FF6633">1.0</td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
+# <span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
+
+# <span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
+# <span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
+# </td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FFFF33">1.2</td>
+
+# <td bgcolor="#98FB98">Windows XP</td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FF6633">=</td>
diff --git a/colprobe/doComparisonTable.pl b/colprobe/doComparisonTable.pl
new file mode 100755
index 0000000..7a02ef3
--- /dev/null
+++ b/colprobe/doComparisonTable.pl
@@ -0,0 +1,209 @@
+#! /usr/bin/perl -w
+
+use strict;
+use IO::File;
+
+
+my $locale = $ARGV[0];
+
+
+my $long_name = `/home/weiv/src/icu/source/extra/colprobe/longname $locale`;
+print "Long name is $long_name\n";
+my $pageTitle = $locale." collation";
+my $filename = $locale.".html";
+
+open TABLE, ">$filename";
+
+
+print TABLE <<"EndOfTemplate";
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>$pageTitle</title>
+<style>
+ <!--
+ table { border-spacing: 0; border-collapse: collapse; width: 100%;
+ border: 1px solid black }
+td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
+ vertical-align: top; border: 1px solid black }
+-->
+ </style>
+</head>
+
+<body bgcolor="#FFFFFF">
+
+<p><b><font color="#FF0000">Collation:</font> $locale ($long_name) <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
+
+<a href="../../comparison_charts.html">Cover
+Page</a>, <a href="../main/index.html">Locale Diffs Index</a>, <a href="index.html">Collation Diffs Index</a></b></p>
+<table>
+ <tr>
+EndOfTemplate
+
+my $dirCommon = "icucollations";
+my $refCommon = $dirCommon."/UCARules.txt";
+my $nameCommon = $dirCommon."/".$locale."_collation.html";
+my $colorCommon = "#AD989D";
+
+my $loc = $locale;
+
+if(!(-e $nameCommon)) {
+ $locale =~ /_/;
+ $loc = $`;
+ $nameCommon = "$dirCommon/$loc"."_collation.html";
+}
+
+print "Common is $nameCommon\n";
+
+print TABLE " <th bgcolor=\"$colorCommon\">COMMON (";
+if(-e $nameCommon) {
+ print TABLE "<a href=\"../../common/collation/$loc.xml\">xml</a> ";
+}
+print TABLE "<a href=\"../../common/collation/root.xml\">UCA</a>)</th>\n";
+
+my $dirLinux = "linuxcollations";
+my $refLinux = $dirLinux."/".$locale.".utf8_default_raw.html";
+my $rawLinux = $dirLinux."/".$locale.".utf8_raw.html";
+my $defLinux = $dirLinux."/".$locale;
+my $nameLinux = "$dirLinux/$locale"."_collation.html";
+my $colorLinux = "#1191F1";
+
+print TABLE " <th bgcolor=\"$colorLinux\">LINUX";
+if (!(-e $nameLinux)) {
+#try the variant that has @euro stuck in
+ $nameLinux = "$dirLinux/$locale".'.utf8@euro_collation.html';
+ if(-e $nameLinux) {
+ $refLinux = $dirLinux."/".$locale.'.utf8@euro_default_raw.html';
+ $rawLinux = $dirLinux."/".$locale.'.utf8@euro_raw.html';
+ }
+}
+if (-e $nameLinux) {
+ print TABLE " (<a href=\"../../linux/collation/$locale.xml\">xml</a>";
+ my $linuxBase = &getBaseLocale("$dirLinux/base", $locale);
+ if($linuxBase ne "") {
+ print TABLE " <a href=\"../../linux/collation/$linuxBase.xml\">Base ($linuxBase)</a>";
+ }
+ print TABLE ")";
+}
+print TABLE "</th>\n";
+
+
+my $dirWin = "w2kcollations";
+my $refWin = $dirWin."/".$locale."_default_raw.html";
+my $rawWin = $dirWin."/".$locale."_raw.html";
+my $nameWin = "$dirWin/$locale"."_collation.html";
+my $colorWin = "#98FB98";
+$loc = $locale;
+#try fallback for windows
+print TABLE " <th bgcolor=\"$colorWin\">WINDOWS";
+if(!(-e $nameWin)) {
+ $locale =~ /_/;
+ $loc = $`;
+ $nameWin = "$dirWin/$loc"."_collation.html";
+}
+
+print "Windows loc is $loc\n";
+
+if (-e $nameWin) {
+ print TABLE " (<a href=\"../../windows/collation/$loc.xml\">xml</a>";
+ my $winBase = &getBaseLocale("$dirWin/base", $locale);
+ if($winBase ne "") {
+ print TABLE "<a href=\"../../windows/collation/$winBase.xml\">base ($winBase)</a>";
+ }
+ print TABLE ")";
+}
+print TABLE "</th>\n";
+print TABLE " </tr>\n <tr>";
+
+
+readRules($nameCommon, "#AD989D", "Same as the UCA.");
+readRules($nameLinux, "#1191F1", "No data available.");
+readRules($nameWin, "#98FB98", "No data available.");
+
+
+print TABLE <<"EndOfFooter";
+ </tr>
+</table>
+
+</body>
+</html>
+EndOfFooter
+
+
+sub readRules {
+ # readRules($file, $color)
+ my $filename = shift;
+ my $color = shift;
+ my $comment = shift;
+ my $noLines = 0;
+ my $printOut = 0;
+
+ my $file;
+
+ if(-e $filename) {
+ open($file, "<$filename") || die "something very strange happened\n";
+ print TABLE "<td bgcolor=\"$color\">\n";
+ while (<$file>) {
+ if (/\}\<br\>$/) {
+ $printOut = 0;
+
+ }
+ if ($printOut) {
+ if(!/^$/ && !/ <br>$/) {
+ print TABLE $_;
+ $noLines++;
+ }
+ }
+ if (/Sequence/) {
+ $printOut = 1;
+ print "found sequence\n";
+ $noLines = 0;
+ }
+
+ }
+ if (!$noLines) {
+ print TABLE "Same ordering as base\n";
+ }
+ print TABLE "</td>\n";
+ } else {
+ print TABLE "<td bgcolor=\"$color\">\n$comment</td>\n";
+ }
+}
+
+sub getBaseLocale(){
+ my $basefile = shift;
+ my $locale = shift;
+ my $baseFH = IO::File->new($basefile,"r")
+ or die "could not open the file $basefile for reading: $! \n";
+ my $bse;
+ my $loc;
+ while(defined ( my $line = <$baseFH>)){
+ if( $line =~ /\<$locale\>/){
+ ($loc,$bse) = split (/\>/, $line);
+ $bse =~ s/^\s+\<//;
+ return $bse;
+ }
+ }
+}
+
+
+# Tasting of food product
+# 650-574-4551 $50 1 hour
+
+
+# <td bgcolor="#AD989D">1.0-alpha</td>
+# <td bgcolor="#FF6633">1.0</td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
+# <span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
+
+# <span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
+# <span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
+# </td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FFFF33">1.2</td>
+
+# <td bgcolor="#98FB98">Windows XP</td>
+# <td bgcolor="#FF6633">=</td>
+# <td bgcolor="#FF6633">=</td>
diff --git a/colprobe/extractCollationData.pl b/colprobe/extractCollationData.pl
new file mode 100755
index 0000000..f7079be
--- /dev/null
+++ b/colprobe/extractCollationData.pl
@@ -0,0 +1,246 @@
+#!/usr/bin/perl
+
+use strict;
+use Unicode::UCD 'charinfo';
+use Unicode::Normalize;
+use utf8;
+use open ':utf8';
+
+my $printout = 0;
+my $braces = 0;
+my $colls = 0;
+my $aliased = 0;
+my $newName = "";
+my $filename;
+my $suffix;
+my $locale;
+
+NEW_FILE:
+foreach my $arg (@ARGV) {
+ if($newName =~ /^$/) {
+ $locale = $arg;
+ $locale =~ s#^.*/##g;
+ $locale =~ s/\.txt//;
+ } else {
+ $newName = "";
+ }
+ my $command = "/home/weiv/build/current/bin/uconv -x hex-any/Java -f utf8 -t utf8 $arg";
+ print $command."\n";
+ my @bundle = `$command`;
+ foreach $_ (@bundle) {
+ #while(<>) {
+ #print $ARGV if eof;
+ if(/^\/\//) {
+ next;
+ }
+ if(/collations/) {
+ print "found Collations\n";
+ $colls = 1;
+ if(/alias/) {
+ print "collations are aliased\n";
+ $aliased = 1;
+ }
+ }
+ if($aliased) {
+ print "processing aliased data: $_\n";
+ if(/\{/) {
+ print "Braces opened\n";
+ $braces = 1;
+ }
+ if($braces && /\"(.*)\"/) {
+ $newName = $1;
+ print "Aliasing to $newName\n";
+ }
+ if($braces && /\}/) {
+ $braces = 0;
+ print "Braces closed\n";
+ $aliased = 0;
+ print "Switching from $filename to $newName\n";
+ $arg =~ s/$locale\.txt$/$newName\.txt/;
+ print "$arg\n";
+ redo NEW_FILE;
+ }
+
+ }
+ if(/standard|phonebook|traditional|pinyin|stroke|direct/ && $colls) {
+ print "found $& collation\n";
+ $suffix = "_".uc($&);
+ if(/standard/) {
+ $suffix = "";
+ }
+ }
+ if(/Sequence/ && $colls) {
+ #binmode ARGV, ":utf8";
+ $printout = 1;
+ #$filename = $ARGV;
+ $filename = $locale;
+ if($suffix) {
+ $filename .= "_".$suffix;
+ }
+ $filename .= "_collation.html";
+ print "filename is $filename\n";
+ #open(OUT, ">:utf8", "$filename");
+ open(OUT, ">$filename");
+ printHeading($arg);
+ #next;
+ }
+ my $line = $_;
+ if($line =~ /\{/ && $printout) {
+ $braces++;
+ }
+ if($printout) {
+ print OUT processLine($line);
+ print OUT "\n";
+ }
+ if( $line =~ /\}/ && $printout) {
+ $braces--;
+ if($braces == 0) {
+ $printout = 0;
+ printFooting();
+ close(OUT);
+ }
+ }
+ }
+}
+
+sub processLine {
+ my $line = shift;
+ $_ = $line;
+ my $i = 0;
+ my $j = 0;
+ my $result;
+# remove comments
+ s#//.*$##g;
+# remove "Sequence" if present
+ s/Sequence\s*//;
+# remove leading brace if present
+ s/^\s*{//;
+# remove trailing brace if present
+ s/}\s*$//;
+# remove trailing quote
+ s/"\s*$//;
+#remove lead quote
+ s/^\s*"//;
+#separate options
+ s/(\[.*\])/\n\1/g;
+#separate resets
+ s/\s*\&\s*/\n\& /g;
+#separate strengths and insert spaces
+ s/\s*(<{1,4})\s*/\n\1 /g;
+#separate equals and insert spaces
+ s/\s*=\s*/\n= /g;
+
+# break into individual reset/strength/setting lines
+ my @lines = split(/\n/);
+
+ my $line;
+ my $name;
+ my $spanEnd = "";
+ my $result = "";
+ my $names = "";
+ my $codes = "";
+ my $lrm = "";
+
+ foreach $line (@lines) {
+ # skip empty lines
+ if($line =~ /^$/) {
+ next;
+ }
+ $spanEnd = "";
+ $name = "";
+ $lrm = "";
+ $line = NFC($line);
+ # for resets and strengths we will get name for elements
+ if($line =~ /<{1,4} |= |& \[.*\]|& /) {
+ $name = "<span title=\"";
+ $names = "";
+ $codes = "";
+ my $start = $&;
+ my $rest = $';
+ for ($j = 0; $j < length($rest); $j++) {
+ my $char = substr($rest, $j, 1);
+ my $charVal = ord($char);
+ # some of elements are part of the syntax, so they are
+ # entered without translation to the name
+ if($charVal == 0x002F || $charVal == 0x007C) {
+ $name .= $codes.$names." $char ";
+ $codes = "";
+ $names = "";
+ } elsif($charVal == 0x0027) { #quote requires more processing
+ #$name .= "'";
+ } else {
+ my $charinfo = charinfo($charVal);
+ $codes .= $charinfo->{'code'}." ";
+ $names .= "{".$charinfo->{'name'}."} ";
+ if($charinfo->{'bidi'} eq "R" || $charinfo->{'bidi'} eq "AL") {
+ $lrm = "‎";
+ }
+ #$name .= $charinfo->{'code'}." {".$charinfo->{'name'}."} ";
+ }
+ }
+ $name .= $codes.$names."\" >";
+ $spanEnd = "</span>";
+ }
+ #print $name."\n";
+ if($line =~ /^<<<</) {
+ $line = " $line";
+ } elsif($line =~ /^<<</) {
+ $line = " $line";
+ } elsif($line =~ /^<</) {
+ $line = " $line";
+ } elsif($line =~ /^</) {
+ $line = " $line";
+ } elsif($line =~ /^=/) {
+ $line = " $line";
+ }
+ # insert spaces around vertical bars (fix prefixes)
+
+ # insert spaces around slashes (fix expansions)
+ $line =~ s#/# / #g;
+ # replace &
+ $line =~ s/\&/&/g;
+ # replace spaces
+ $line =~ s/ / /g;
+ # replace <
+ $line =~ s/</</g;
+ # replace >
+ $line =~ s/>/>/g;
+
+ #$lines[$i] = $name.$lrm.$line."</span><br>";
+ #$i++;
+ $result .= $name.$lrm.$line.$spanEnd."<br>\n";
+ }
+
+ #$_ = join("\n", @lines);
+ return $result;
+
+}
+
+sub printHeading {
+my $filename = shift;
+$filename =~ s/\.txt//;
+print OUT <<"EndOfHeading";
+<html>
+<head>
+<meta http-equiv="content-type" content="text/html; charset=utf-8">
+</head>
+# Collation data resource bundle generated for locale: $filename<br>
+# For platform icu reference platform UCA<br><br>
+
+
+$filename {<br>
+ CollationElements {<br>
+ Sequence {<br>
+EndOfHeading
+}
+
+sub printFooting {
+print OUT <<"EndOfFooting";
+ }<br>
+ }<br>
+}<br>
+
+</pre>
+</html>
+EndOfFooting
+}
diff --git a/colprobe/gcd2.pl b/colprobe/gcd2.pl
new file mode 100755
index 0000000..c8e5f87
--- /dev/null
+++ b/colprobe/gcd2.pl
@@ -0,0 +1,24 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+#my $localeMinusA = `locale -a`;
+my $localeMinusA = `cat ~/src/icu/source/extra/colprobe/locale.txt`;
+my @locales = split(/\n/, $localeMinusA);
+my $locale;
+my $command;
+
+my $platform = $ARGV[0];
+
+mkdir $platform."logs2";
+mkdir $platform;
+
+foreach $locale (@locales) {
+ $command = "~/src/icu/source/extra/colprobe/colprobe --platform $platform --ref $platform --diff $locale >$platform"."logs2/$locale"."Log.txt 2>&1";
+ ($locale, $_) = split(/\./, $locale);
+ $command .= "; cp /usr/share/i18n/locales/$locale $platform/";
+ print "$command\n";
+ `$command`;
+ #chdir "..";
+
+}
diff --git a/colprobe/genCollData.pl b/colprobe/genCollData.pl
new file mode 100755
index 0000000..2f46184
--- /dev/null
+++ b/colprobe/genCollData.pl
@@ -0,0 +1,23 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $localeMinusA = `locale -a`;
+my @locales = split(/\n/, $localeMinusA);
+my $locale;
+my $command;
+
+my $platform = $ARGV[0];
+
+mkdir $platform."logs";
+mkdir $platform;
+
+foreach $locale (@locales) {
+ $command = "~/src/icu/source/extra/colprobe/colprobe --output resb --platform linux --ref linux $locale >$platform"."logs/$locale"."Log.txt 2>&1";
+ ($locale, $_) = split(/\./, $locale);
+ $command .= "; cp /usr/share/i18n/locales/$locale $platform/";
+ print "$command\n";
+ `$command`;
+ #chdir "..";
+
+}
diff --git a/colprobe/line.cpp b/colprobe/line.cpp
new file mode 100755
index 0000000..d8829d9
--- /dev/null
+++ b/colprobe/line.cpp
@@ -0,0 +1,701 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File line.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+#include "line.h"
+#include <stdio.h>
+
+UnicodeSet * Line::needsQuoting = NULL;
+
+void
+Line::init()
+{
+ len = 0;
+ expLen = 0;
+ strength = UCOL_OFF;
+ strengthFromEmpty = UCOL_OFF;
+ cumulativeStrength = UCOL_OFF;
+ expStrength = UCOL_OFF;
+ previous = NULL;
+ next = NULL;
+ left = NULL;
+ right = NULL;
+ isContraction = FALSE;
+ isExpansion = FALSE;
+ isRemoved = FALSE;
+ isReset = FALSE;
+ expIndex = 0;
+ firstCC = 0;
+ lastCC = 0;
+ sortKey = NULL;
+}
+
+Line::Line()
+{
+ init();
+ memset(name, 0, 25*sizeof(UChar));
+ memset(expansionString, 0, 25*sizeof(UChar));
+}
+
+Line::Line(const UChar* name, int32_t len)
+{
+ init();
+ this->len = len;
+ u_memcpy(this->name, name, len);
+ memset(expansionString, 0, 25*sizeof(UChar));
+ UChar32 c;
+ U16_GET(name, 0, 0, len, c);
+ firstCC = u_getCombiningClass(c);
+ U16_GET(name, 0, len-1, len, c);
+ lastCC = u_getCombiningClass(c);
+}
+
+Line::Line(const UChar name)
+{
+ init();
+ len = 1;
+ this->name[0] = name;
+ this->name[1] = 0;
+ memset(expansionString, 0, 25*sizeof(UChar));
+ firstCC = u_getCombiningClass(name);
+ lastCC = firstCC;
+}
+
+Line::Line(const UnicodeString &string)
+{
+ init();
+ setTo(string);
+}
+
+Line::Line(const char *buff, int32_t buffLen, UErrorCode &status) :
+previous(NULL),
+next(NULL),
+left(NULL),
+right(NULL)
+{
+ initFromString(buff, buffLen, status);
+}
+
+Line::Line(const Line &other) :
+ previous(NULL),
+ next(NULL),
+left(NULL),
+right(NULL)
+{
+ *this = other;
+}
+
+Line &
+Line::operator=(const Line &other) {
+ len = other.len;
+ expLen = other.expLen;
+ strength = other.strength;
+ strengthFromEmpty = other.strengthFromEmpty;
+ cumulativeStrength = other.cumulativeStrength;
+ expStrength = other.expStrength;
+ isContraction = other.isContraction;
+ isExpansion = other.isExpansion;
+ isRemoved = other.isRemoved;
+ isReset = other.isReset;
+ expIndex = other.expIndex;
+ firstCC = other.firstCC;
+ lastCC = other.lastCC;
+ u_strcpy(name, other.name);
+ u_strcpy(expansionString, other.expansionString);
+ sortKey = other.sortKey;
+ left = other.left;
+ right = other.right;
+ return *this;
+}
+
+UBool
+Line::operator==(const Line &other) const {
+ if(this == &other) {
+ return TRUE;
+ }
+ if(len != other.len) {
+ return FALSE;
+ }
+ if(u_strcmp(name, other.name) != 0) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+UBool
+Line::equals(const Line &other) const {
+ if(this == &other) {
+ return TRUE;
+ }
+ if(len != other.len) {
+ return FALSE;
+ }
+ if(u_strcmp(name, other.name) != 0) {
+ return FALSE;
+ }
+ if(strength != other.strength) {
+ return FALSE;
+ }
+ if(expLen != other.expLen) {
+ return FALSE;
+ }
+ if(u_strcmp(expansionString, other.expansionString)) {
+ return FALSE;
+ }
+ return TRUE;
+}
+
+UBool
+Line::operator!=(const Line &other) const {
+ return !(*this == other);
+}
+
+
+Line::~Line() {
+}
+
+void
+Line::copyArray(Line *dest, const Line *src, int32_t size) {
+ int32_t i = 0;
+ for(i = 0; i < size; i++) {
+ dest[i] = src[i];
+ }
+}
+
+void
+Line::setName(const UChar* name, int32_t len) {
+ this->len = len;
+ u_memcpy(this->name, name, len);
+ UChar32 c;
+ U16_GET(name, 0, 0, len, c);
+ firstCC = u_getCombiningClass(c);
+ U16_GET(name, 0, len-1, len, c);
+ lastCC = u_getCombiningClass(c);
+}
+
+void
+Line::setToConcat(const Line *first, const Line *second) {
+ u_strcpy(name, first->name);
+ u_strcat(name, second->name);
+ len = first->len + second->len;
+ firstCC = first->firstCC;
+ lastCC = second->lastCC;
+}
+
+UnicodeString
+Line::stringToName(UChar *string, int32_t len) {
+ UErrorCode status = U_ZERO_ERROR;
+ UnicodeString result;
+ char buffer[256];
+ int32_t i = 0;
+ UChar32 c;
+ while(i < len) {
+ U16_NEXT(string, i, len, c);
+ if(c < 0x10000) {
+ sprintf(buffer, "%04X ", c);
+ } else {
+ sprintf(buffer, "%06X ", c);
+ }
+ result.append(buffer);
+ }
+ i = 0;
+ while(i < len) {
+ U16_NEXT(string, i, len, c);
+ u_charName(c, U_EXTENDED_CHAR_NAME, buffer, 256, &status);
+ result.append("{");
+ result.append(buffer);
+ result.append("} ");
+ }
+/*
+ for(i = 0; i < len; i++) {
+ sprintf(buffer, "%04X ", string[i]);
+ result.append(buffer);
+ }
+ for(i = 0; i < len; i++) {
+ u_charName(string[i], U_EXTENDED_CHAR_NAME, buffer, 256, &status);
+ result.append("{");
+ result.append(buffer);
+ result.append("} ");
+ }
+*/
+ return result;
+}
+
+UnicodeString
+Line::toBundleString()
+{
+
+ UnicodeString result;
+ UErrorCode status = U_ZERO_ERROR;
+ if(!needsQuoting) {
+ needsQuoting = new UnicodeSet("[[:whitespace:][:c:][:z:][[:ascii:]-[a-zA-Z0-9]]]", status);
+ }
+ UChar NFC[50];
+ int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
+ result.append("\"");
+ if(isReset) {
+ result.append("&");
+ } else {
+ result.append(strengthToString(strength, FALSE, FALSE));
+ }
+ UBool quote = needsQuoting->containsSome(name) || needsQuoting->containsSome(NFC);
+ if(quote) {
+ result.append("'");
+ }
+ if(NFC[0] == 0x22) {
+ result.append("\\u0022");
+ } else {
+ result.append(NFC, NFCLen);
+ }
+ if(quote && NFC[0] != 0x0027) {
+ result.append("'");
+ }
+ if(expLen && !isReset) {
+ quote = needsQuoting->containsSome(expansionString);
+ result.append(" / ");
+ if(quote) {
+ result.append("'");
+ }
+ result.append(expansionString);
+ if(quote) {
+ result.append("'");
+ }
+ }
+ result.append("\" //");
+
+ result.append(stringToName(NFC, NFCLen));
+ if(expLen && !isReset) {
+ result.append(" / ");
+ result.append(stringToName(expansionString, expLen));
+ }
+ result.append("\n");
+ return result;
+}
+
+UnicodeString
+Line::toHTMLString()
+{
+ UnicodeString result;
+ UErrorCode status = U_ZERO_ERROR;
+ UChar NFC[50];
+ int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
+ result.append("<span title=\"");
+ result.append(stringToName(NFC, NFCLen));
+ if(expLen && !isReset) {
+ result.append(" / ");
+ result.append(stringToName(expansionString, expLen));
+ }
+ result.append("\">");
+ if(isReset) {
+ result.append("&");
+ } else {
+ result.append(strengthToString(strength, FALSE, TRUE));
+ }
+ result.append(NFC, NFCLen);
+ if(expLen && !isReset) {
+ result.append(" / ");
+ result.append(expansionString);
+ }
+ result.append("</span><br>\n");
+ return result;
+}
+
+UnicodeString
+Line::toString(UBool pretty) {
+ UnicodeString result;
+ if(!pretty) {
+ result.setTo(name);
+ if(expLen) {
+ result.append("/");
+ result.append(expansionString);
+ }
+ } else {
+ UErrorCode status = U_ZERO_ERROR;
+ UChar NFC[50];
+ int32_t NFCLen = unorm_normalize(name, len, UNORM_NFC, 0, NFC, 50, &status);
+ result.setTo(NFC, NFCLen);
+ if(expLen) {
+ result.append("/");
+ result.append(expansionString);
+ }
+ /*
+ if(NFCLen != len || u_strncmp(name, NFC, len) != 0) {
+ result.append("(NFC: ");
+ result.append(NFC, NFCLen);
+ result.append(stringToName(NFC, NFCLen));
+ result.append(")");
+ }
+ */
+ result.append(" # ");
+ result.append(stringToName(NFC, NFCLen));
+ if(expLen) {
+ result.append("/ ");
+ result.append(stringToName(expansionString, expLen));
+ }
+ }
+ return result;
+}
+
+
+void
+Line::setTo(const UnicodeString &string) {
+ int32_t len = string.length();
+ u_strncpy(name, string.getBuffer(), len);
+ name[len] = 0;
+ this->len = len;
+ UChar32 c;
+ U16_GET(name, 0, 0, len, c);
+ firstCC = u_getCombiningClass(c);
+ U16_GET(name, 0, len-1, len, c);
+ lastCC = u_getCombiningClass(c);
+}
+
+void
+Line::setTo(const UChar32 n) {
+ UBool isError = FALSE;
+ len = 0; // we are setting the line to char, not appending
+ U16_APPEND(name, len, 25, n, isError);
+ name[len] = 0;
+ firstCC = u_getCombiningClass(n);
+ lastCC = firstCC;
+}
+
+
+UnicodeString
+Line::strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result)
+{
+ int i;
+ int numIndents = strength+1;
+ if(strength > UCOL_IDENTICAL) {
+ return result;
+ } else if(strength == UCOL_IDENTICAL) {
+ numIndents = 5;
+ }
+ for(i = 0; i < numIndents*indentSize; i++) {
+ result.append(" ");
+ }
+ return result;
+}
+
+UnicodeString
+Line::strengthToString(UColAttributeValue strength, UBool pretty, UBool html) {
+ UnicodeString result;
+ if(html) {
+ switch(strength) {
+ case UCOL_IDENTICAL:
+ result.append(" = ");
+ break;
+ case UCOL_QUATERNARY:
+ result.append(" <<<< ");
+ break;
+ case UCOL_TERTIARY:
+ result.append(" <<< ");
+ break;
+ case UCOL_SECONDARY:
+ result.append(" << ");
+ break;
+ case UCOL_PRIMARY:
+ result.append(" < ");
+ break;
+ case UCOL_OFF:
+ result.append(" >? ");
+ break;
+ default:
+ result.append(" ?! ");
+ break;
+ }
+ } else {
+ switch(strength) {
+ case UCOL_IDENTICAL:
+ if(pretty) {
+ result.append(" ");
+ }
+ result.append(" = ");
+ break;
+ case UCOL_QUATERNARY:
+ if(pretty) {
+ result.append(" ");
+ }
+ result.append(" <<<< ");
+ break;
+ case UCOL_TERTIARY:
+ //u_fprintf(file, "<3");
+ if(pretty) {
+ result.append(" ");
+ }
+ result.append(" <<< ");
+ break;
+ case UCOL_SECONDARY:
+ //u_fprintf(file, "<2");
+ if(pretty) {
+ result.append(" ");
+ }
+ result.append(" << ");
+ break;
+ case UCOL_PRIMARY:
+ //u_fprintf(file, "<1");
+ if(pretty) {
+ result.append(" ");
+ }
+ result.append(" < ");
+ break;
+ case UCOL_OFF:
+ result.append(" >? ");
+ break;
+ default:
+ result.append(" ?! ");
+ break;
+ }
+ }
+ return result;
+}
+
+Line *
+Line::nextInteresting() {
+ Line *result = this->next;
+ while(result && result->strength != UCOL_IDENTICAL) {
+ result = result->next;
+ }
+ return result;
+}
+
+void
+Line::append(const UChar* n, int32_t length)
+{
+ u_strncat(name, n, length);
+ name[len+length] = 0;
+ len += length;
+ UChar32 end;
+ U16_GET(n, 0, length-1, length, end);
+ lastCC = u_getCombiningClass(end);
+}
+
+void
+Line::append(const UChar n)
+{
+ name[len] = n;
+ name[len+1] = 0;
+ len++;
+ lastCC = u_getCombiningClass(n);
+}
+
+void
+Line::append(const Line &l)
+{
+ append(l.name, l.len);
+ lastCC = l.lastCC;
+}
+
+void
+Line::clear()
+{
+ name[0] = 0;
+ len = 0;
+}
+
+int32_t
+Line::write(char *buff, int32_t, UErrorCode &)
+{
+ /*
+ UChar name[25];
+ int32_t len;
+ UChar expansionString[25];
+ int32_t expLen;
+
+ UColAttributeValue strength;
+ UColAttributeValue strengthFromEmpty;
+ UColAttributeValue cumulativeStrength;
+ UColAttributeValue expStrength;
+
+ Line *previous;
+ Line *next;
+
+ UBool isContraction;
+ UBool isExpansion;
+ UBool isRemoved;
+ UBool isReset;
+
+ int32_t expIndex;
+ uint8_t firstCC;
+ uint8_t lastCC;
+*/
+ int32_t resLen = 0;
+ int32_t i = 0;
+ sprintf(buff+resLen, "%04X", name[0]);
+ resLen += 4;
+ for(i = 1; i < len; i++) {
+ sprintf(buff+resLen, " %04X", name[i]);
+ resLen += 5;
+ }
+ sprintf(buff+resLen, "/");
+ resLen += 1;
+
+ i = 0;
+ if(expLen) {
+ sprintf(buff+resLen, "%04X", expansionString[0]);
+ resLen += 4;
+ for(i = 1; i < expLen; i++) {
+ sprintf(buff+resLen, " %04X", expansionString[i]);
+ resLen += 5;
+ }
+ }
+ sprintf(buff+resLen, "; ");
+ resLen += 2;
+
+ sprintf(buff+resLen, "%02i ", strength);
+ resLen += 3;
+ sprintf(buff+resLen, "%02i", strengthFromEmpty);
+ resLen += 2;
+ sprintf(buff+resLen, "%02i", cumulativeStrength);
+ resLen += 2;
+ sprintf(buff+resLen, "%02i", expStrength);
+ resLen += 2;
+
+ // Various flags. The only interesting ones are isReset and isRemoved. We will not output removed lines
+ //sprintf(buff+resLen, "%1i%1i%1i%1i ", isContraction, isExpansion, isRemoved, isReset);
+ //resLen += 5;
+ sprintf(buff+resLen, "%1i%1i ", isRemoved, isReset);
+ resLen += 3;
+
+ // first and last CC
+ // can be calculated on reading
+ //sprintf(buff+resLen, "%03i %03i ", firstCC, lastCC);
+ //resLen += 8;
+
+ sprintf(buff+resLen, "%08X", expIndex);
+ resLen += 8;
+
+ buff[resLen] = 0;
+
+ return resLen;
+}
+
+void
+Line::initFromString(const char *buff, int32_t, UErrorCode &)
+{
+ int32_t bufIndex = 0;
+ int32_t i = 0;
+
+ sscanf(buff+bufIndex, "%04X", &name[i]);
+ i++;
+ bufIndex += 4;
+ while(buff[bufIndex] != '/') {
+ sscanf(buff+bufIndex, " %04X", &name[i]);
+ i++;
+ bufIndex += 5;
+ }
+ len = i;
+ name[len] = 0;
+ bufIndex++;
+
+ if(i > 1) {
+ isContraction = TRUE;
+ } else {
+ isContraction = FALSE;
+ }
+
+ if(buff[bufIndex] == ';') {
+ isExpansion = FALSE;
+ bufIndex += 2;
+ expansionString[0] = 0;
+ expLen = 0;
+ } else {
+ i = 0;
+ sscanf(buff+bufIndex, "%04X", &expansionString[i]);
+ i++;
+ bufIndex += 4;
+ while(buff[bufIndex] != ';') {
+ sscanf(buff+bufIndex, " %04X", &expansionString[i]);
+ i++;
+ bufIndex += 5;
+ }
+ expLen = i;
+ expansionString[expLen] = 0;
+ bufIndex += 2;
+ }
+ sscanf(buff+bufIndex, "%02i ", &strength);
+ bufIndex += 3;
+ sscanf(buff+bufIndex, "%02i", &strengthFromEmpty);
+ bufIndex += 2;
+ sscanf(buff+bufIndex, "%02i", &cumulativeStrength);
+ bufIndex += 2;
+ sscanf(buff+bufIndex, "%02i", &expStrength);
+ bufIndex += 2;
+
+ sscanf(buff+bufIndex, "%1i%1i ", &isRemoved, &isReset);
+ bufIndex += 3;
+
+ sscanf(buff+bufIndex, "%08X", &expIndex);
+ bufIndex += 8;
+
+ // calculate first and last CC
+ UChar32 c;
+ U16_GET(name, 0, 0, len, c);
+ firstCC = u_getCombiningClass(c);
+ U16_GET(name, 0, len-1, len, c);
+ lastCC = u_getCombiningClass(c);
+}
+
+void
+Line::swapCase(UChar *string, int32_t &sLen)
+{
+ UChar32 c = 0;
+ int32_t i = 0, j = 0;
+ UChar buff[256];
+ UBool isError = FALSE;
+ while(i < sLen) {
+ U16_NEXT(string, i, sLen, c);
+ if(u_isUUppercase(c)) {
+ c = u_tolower(c);
+ } else if(u_isULowercase(c)) {
+ c = u_toupper(c);
+ }
+ U16_APPEND(buff, j, 256, c, isError);
+ }
+ buff[j] = 0;
+ u_strcpy(string, buff);
+ sLen = j;
+}
+
+
+void
+Line::swapCase()
+{
+ swapCase(name, len);
+ swapCase(expansionString, expLen);
+}
+
+UnicodeString
+Line::dumpSortkey()
+{
+
+ char buffer[256];
+ char *buff = buffer;
+ *buff = 0;
+ uint8_t *key = sortKey;
+ if(sortKey) {
+ while(*key) {
+ sprintf(buff, "%02X ", *key);
+ key++;
+ buff += 3;
+ if(buff - buffer > 252) {
+ break;
+ }
+ }
+ }
+ return UnicodeString(buffer);
+}
+
diff --git a/colprobe/line.h b/colprobe/line.h
new file mode 100755
index 0000000..31ce204
--- /dev/null
+++ b/colprobe/line.h
@@ -0,0 +1,113 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File line.h
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+//
+// class Line
+//
+// Each line from the source file (containing a name, presumably) gets
+// one of these structs.
+//
+
+#ifndef COLPROBE_LINE_H
+#define COLPROBE_LINE_H
+#include "unicode/utypes.h"
+#include "unicode/ucol.h"
+#include "unicode/ustring.h"
+#include "unicode/unistr.h"
+#include "unicode/uchar.h"
+#include "unicode/uniset.h"
+#include "colprobe.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+static const int MAX_EXPANSION_PREFIXES = 10;
+
+class Line {
+public:
+ static void copyArray(Line *dest, const Line *src, int32_t size);
+ Line();
+ Line(const Line &other);
+ Line(const UChar* name, int32_t len);
+ Line(const UnicodeString &string);
+ Line(const UChar name);
+ Line(const char *buff, int32_t buffLen, UErrorCode &status);
+ ~Line();
+ Line & operator=(const Line &other);
+ UBool operator==(const Line &other) const;
+ UBool operator!=(const Line &other) const;
+ void setToConcat(const Line *first, const Line *second);
+ void setName(const UChar* name, int32_t len);
+ UnicodeString toString(UBool pretty = FALSE);
+ UnicodeString toBundleString();
+ UnicodeString toHTMLString();
+ int32_t write(char *buff, int32_t buffLen, UErrorCode &status);
+ void initFromString(const char *buff, int32_t buffLen, UErrorCode &status);
+
+
+ UnicodeString strengthIndent(UColAttributeValue strength, int indentSize, UnicodeString &result);
+ UnicodeString strengthToString(UColAttributeValue strength, UBool pretty, UBool html = FALSE);
+ UnicodeString stringToName(UChar *string, int32_t len);
+ void setTo(const UnicodeString &string);
+ void setTo(const UChar32 n);
+ UBool equals(const Line &other) const;
+ Line *nextInteresting();
+ void append(const UChar n);
+ void append(const UChar* n, int32_t length);
+ void append(const Line &l);
+ void clear();
+ void swapCase();
+ void swapCase(UChar *string, int32_t &sLen);
+ UnicodeString dumpSortkey();
+ void init();
+
+
+public:
+ UChar name[25];
+ int32_t len;
+ UChar expansionString[25];
+ int32_t expLen;
+
+ UColAttributeValue strength;
+ UColAttributeValue strengthFromEmpty;
+ UColAttributeValue cumulativeStrength;
+ UColAttributeValue expStrength;
+
+ Line *previous;
+ Line *next;
+
+ // In case this element is a contraction
+ // we keep a pointer at which lines were components
+ Line *left;
+ Line *right;
+
+ UBool isContraction;
+ UBool isExpansion;
+ UBool isRemoved;
+ UBool isReset;
+
+ int32_t expIndex;
+ uint8_t firstCC;
+ uint8_t lastCC;
+
+ uint8_t *sortKey;
+public:
+ static UnicodeSet *needsQuoting;
+};
+
+
+#endif //COLPROBE_LINE_H
diff --git a/colprobe/locale.txt b/colprobe/locale.txt
new file mode 100755
index 0000000..324ab56
--- /dev/null
+++ b/colprobe/locale.txt
@@ -0,0 +1,241 @@
+af
+af_ZA
+am
+am_ET
+ar
+ar_AE
+ar_BH
+ar_DZ
+ar_EG
+ar_IN
+ar_IQ
+ar_JO
+ar_KW
+ar_LB
+ar_LY
+ar_MA
+ar_OM
+ar_QA
+ar_SA
+ar_SD
+ar_SY
+ar_TN
+ar_YE
+be
+be_BY
+bg
+bg_BG
+bn
+bn_IN
+ca
+ca_ES
+#ca_ES_PREEURO
+cs
+cs_CZ
+da
+da_DK
+de
+de_AT
+#de_AT_PREEURO
+de_BE
+de_CH
+de_DE
+#de_DE_PREEURO
+de_LU
+#de_LU_PREEURO
+de__PHONEBOOK
+el
+el_GR
+#el_GR_PREEURO
+en
+en_AU
+en_BE
+#en_BE_PREEURO
+en_BW
+en_CA
+en_GB
+#en_GB_EURO
+en_HK
+en_IE
+#en_IE_PREEURO
+en_IN
+en_MT
+en_NZ
+en_PH
+en_SG
+en_US
+en_US_POSIX
+en_VI
+en_ZA
+en_ZW
+eo
+es
+es_AR
+es_BO
+es_CL
+es_CO
+es_CR
+es_DO
+es_EC
+es_ES
+#es_ES_PREEURO
+es_GT
+es_HN
+es_MX
+es_NI
+es_PA
+es_PE
+es_PR
+es_PY
+es_SV
+es_US
+es_UY
+es_VE
+es__TRADITIONAL
+et
+et_EE
+eu
+eu_ES
+#eu_ES_PREEURO
+fa
+fa_AF
+fa_IR
+fi
+fi_FI
+#fi_FI_PREEURO
+fo
+fo_FO
+fr
+fr_BE
+#fr_BE_PREEURO
+fr_CA
+fr_CH
+fr_FR
+#fr_FR_PREEURO
+fr_LU
+#fr_LU_PREEURO
+ga
+ga_IE
+#ga_IE_PREEURO
+gl
+gl_ES
+#gl_ES_PREEURO
+gu
+gu_IN
+gv
+gv_GB
+he
+he_IL
+hi
+hi_IN
+hi__DIRECT
+hr
+hr_HR
+hu
+hu_HU
+hy
+hy_AM
+hy_AM_REVISED
+id
+id_ID
+is
+is_IS
+it
+it_CH
+it_IT
+#it_IT_PREEURO
+ja
+ja_JP
+#ja_JP_TRADITIONAL
+kk_KZ
+kl
+kl_GL
+kn
+kn_IN
+ko
+ko_KR
+kok
+kok_IN
+kw
+kw_GB
+lt
+lt_LT
+lv
+lv_LV
+mk
+mk_MK
+mr
+mr_IN
+ms_MY
+mt
+mt_MT
+nb
+nb_NO
+nl
+nl_BE
+#nl_BE_PREEURO
+nl_NL
+#nl_NL_PREEURO
+nn
+nn_NO
+om
+om_ET
+om_KE
+pl
+pl_PL
+ps
+ps_AF
+pt
+pt_BR
+pt_PT
+#pt_PT_PREEURO
+ro
+ro_RO
+ru
+ru_RU
+ru_UA
+sh
+sh_YU
+sk
+sk_SK
+sl
+sl_SI
+so
+so_DJ
+so_ET
+so_KE
+so_SO
+sq
+sq_AL
+sr
+sr_YU
+sv
+sv_FI
+sv_SE
+sw
+sw_KE
+sw_TZ
+ta
+ta_IN
+te
+te_IN
+th
+th_TH
+#th_TH_TRADITIONAL
+ti
+ti_ER
+ti_ET
+tr
+tr_TR
+uk
+uk_UA
+vi
+vi_VN
+zh
+zh_CN
+zh_HK
+zh_MO
+zh_SG
+zh_TW
+zh_TW_STROKE
+zh__PINYIN
diff --git a/colprobe/longname.cpp b/colprobe/longname.cpp
new file mode 100755
index 0000000..71ba45d
--- /dev/null
+++ b/colprobe/longname.cpp
@@ -0,0 +1,48 @@
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+#include "unicode/ucnv.h"
+#include <stdio.h>
+
+int main(int argc,
+ char* argv[])
+{
+ UErrorCode status = U_ZERO_ERROR;
+ const char *loc = argv[1];
+ int32_t hasCountry;
+ UConverter *conv = ucnv_open("utf8", &status);
+
+
+ UChar UBuffer[256];
+ int32_t uBufLen = 0;
+ char buffer[256];
+ int32_t bufLen = 0;
+
+ uBufLen = uloc_getDisplayLanguage(loc, "en", UBuffer, 256, &status);
+ bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
+ //u_UCharsToChars(UBuffer, buffer, uBufLen);
+ buffer[bufLen] = 0;
+ printf("%s", buffer);
+
+ if(hasCountry = uloc_getCountry(loc, buffer, 256, &status)) {
+ uBufLen = uloc_getDisplayCountry(loc, "en", UBuffer, 256, &status);
+ bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
+ //u_UCharsToChars(UBuffer, buffer, uBufLen);
+ buffer[bufLen] = 0;
+ printf("_%s", buffer);
+ }
+
+ if(uloc_getVariant(loc, buffer, 256, &status)) {
+ uBufLen = uloc_getDisplayVariant(loc, "en", UBuffer, 256, &status);
+ bufLen = ucnv_fromUChars(conv, buffer, 256, UBuffer, uBufLen, &status);
+ //u_UCharsToChars(UBuffer, buffer, uBufLen);
+ buffer[bufLen] = 0;
+ if(!hasCountry) {
+ printf("_");
+ }
+ printf("_%s", buffer);
+ }
+ printf("\n");
+
+
+ return 0;
+}
diff --git a/colprobe/readme.txt b/colprobe/readme.txt
new file mode 100755
index 0000000..fd2914a
--- /dev/null
+++ b/colprobe/readme.txt
@@ -0,0 +1,4 @@
+There are several tools in this directory that should make it easier to generate collation data:
+extractCollationData.pl - perl script that reads ICU resource bundle files and outputs a locale_collation.html file if collation elements are present in the locale. Arguments are the list of locale source files (*.txt) that need to be processed.
+createComparisonTables.pl - takes a locale name. Looks in directories that should contain the html data produced by colprobe or extractCollationData.
+tableStarter.pl - invokes createComparisonTables.pl with a list of locales.
diff --git a/colprobe/sortedlines.cpp b/colprobe/sortedlines.cpp
new file mode 100755
index 0000000..a4f3dfe
--- /dev/null
+++ b/colprobe/sortedlines.cpp
@@ -0,0 +1,2067 @@
+#include "sortedlines.h"
+
+static int codePointCmp(const void *a, const void *b) {
+ return u_strcmp((*(Line **)a)->name, (*(Line **)b)->name);
+}
+
+SortedLines::SortedLines(const UnicodeSet &set, const UnicodeSet &excludeBounds, const StrengthProbe &probe,
+ UPrinter *logger, UPrinter *debug) :
+toSort(NULL),
+toSortCapacity(0),
+lines(NULL),
+size(0),
+capacity(0),
+repertoire(set),
+excludeBounds(excludeBounds),
+probe(probe),
+first(NULL),
+last(NULL),
+logger(logger),
+debug(debug),
+contractionsTable(NULL),
+duplicators(NULL),
+maxExpansionPrefixSize(0),
+wordSort(FALSE),
+frenchSecondary(FALSE),
+upperFirst(FALSE),
+sortkeys(NULL),
+sortkeyOffset(0)
+{
+ memset(UB, 0, sizeof(UB));
+ int32_t i = 0;
+ for(i = 0; i < UCOL_OFF; i++) {
+ UB[i] = ∅
+ }
+ init();
+}
+
+SortedLines::~SortedLines()
+{
+ delete[] lines;
+ if(sortkeys) {
+ delete[] sortkeys;
+ }
+ if(toSort) {
+ delete[] toSort;
+ }
+ if(contractionsTable) {
+ delete contractionsTable;
+ }
+ if(duplicators) {
+ delete duplicators;
+ }
+}
+
+void
+SortedLines::getBounds(UErrorCode &status) {
+ // first sort through the set
+ debug->log(toString(), TRUE);
+ int32_t i = 0, j = 0;
+ UColAttributeValue strength = UCOL_OFF;
+ for(i = 0; i < size; i++) {
+ if(toSort[i]->strengthFromEmpty < strength) {
+ if(i && strength < UCOL_OFF) {
+ //u_strcpy(UB[strength], toSort[i-1]->name);
+ j = 1;
+ while(excludeBounds.contains(UnicodeString(toSort[i-j]->name, toSort[i-j]->len))) {
+ j++;
+ }
+ UB[strength] = toSort[i-j];
+ }
+ strength = toSort[i]->strengthFromEmpty;
+ if(strength == UCOL_PRIMARY) {
+ probe.SE = toSort[i]->name[0];
+ }
+ }
+ }
+ //u_strcpy(UB[strength], toSort[size-1]->name);
+ // a different solution for bounds: go from end and see if the guys on the top
+ // cause duplication for things
+ UChar dupch[] = { 0x0020, 0x0030, 0x0042, 0x0051, 0x0062, 0x0071, 0x0391, 0x0396, 0x03b1, 0x03b6 };
+ j = 1;
+ Line dup;
+ Line bound;
+ int32_t dups = 0;
+ while(j < size) {
+ dups = 0;
+ for(i = 0; i < sizeof(dupch)/sizeof(dupch[0]); i++) {
+ dup.setTo(dupch[i]);
+ dup.append(dupch[i]);
+ bound.setTo(dupch[i]);
+ bound.append(toSort[size-j]->name, toSort[size-j]->len);
+ if(probe.getStrength(dup, bound) >= UCOL_IDENTICAL) {
+ dups++;
+ }
+ }
+ if(dups == 0) {
+ break;
+ } else {
+ if(!duplicators) {
+ duplicators = new Hashtable();
+ }
+ duplicators->put(UnicodeString(toSort[size-j]->name, toSort[size-j]->len), &toSort[size-j], status);
+ debug->log(toSort[size-j]->toString());
+ debug->log(" is not good enough to be an upper bound\n");
+ j++;
+ }
+ }
+ if(j == size) {
+ debug->log("Oi! I'm hallucinating. Will use the first upper bound");
+ delete duplicators;
+ duplicators = NULL;
+ j = 1;
+ }
+/*
+ j = 1;
+ while(excludeBounds.contains(UnicodeString(toSort[size-j]->name, toSort[size-j]->len))) {
+ j++;
+ }
+*/
+ UB[strength] = toSort[size-j];
+ for(i = 0; i < UCOL_OFF; i++) {
+ if(UB[i]) {
+ //debug->log(UB[i], TRUE);
+ debug->log(UB[i]->toString(TRUE), TRUE);
+ }
+ }
+}
+
+// classifies repertoire according to the strength of their difference
+// from the empty string
+void
+SortedLines::classifyRepertoire() {
+ UColAttributeValue strongestStrengthFromEmpty = UCOL_OFF;
+ int32_t lastChange = 0;
+ int32_t i = 0, j = 0;
+ while(i < size) // && probe.distanceFromEmptyString(*toSort[i]) > UCOL_PRIMARY)
+ {
+ toSort[i]->strengthFromEmpty = probe.distanceFromEmptyString(*toSort[i]);
+ if(toSort[i]->strengthFromEmpty < strongestStrengthFromEmpty) {
+ strongestStrengthFromEmpty = toSort[i]->strengthFromEmpty;
+ lastChange = i;
+ } else if (toSort[i]->strengthFromEmpty > strongestStrengthFromEmpty) {
+ // there is a problem in detection. Most probably a quaternary.
+ // why don't we try to interpolate
+ UColAttributeValue nextStrength = UCOL_OFF;
+ UColAttributeValue prevStrength = UCOL_OFF;
+ UColAttributeValue st = UCOL_OFF;
+
+ logger->log("Interpolating to get the distance from empty for Line ");
+ logger->log(toSort[i]->toString(TRUE), TRUE);
+
+ if(i) {
+ st = probe.getStrength(*toSort[i-1], *toSort[i]);
+ if(st == UCOL_OFF) {
+ logger->log("Cannot deduce distance from empty using previous element. Something is very wrong! Line:");
+ logger->log(toSort[i]->toString(TRUE), TRUE);
+ } else if(st == UCOL_IDENTICAL || st >= toSort[i-1]->strengthFromEmpty) {
+ prevStrength = toSort[i-1]->strengthFromEmpty;
+ } else if(st < toSort[i-1]->strengthFromEmpty) {
+ prevStrength = st;
+ }
+ toSort[i]->strengthFromEmpty = prevStrength;
+ }
+ if(i < size-2) {
+ toSort[i+1]->strengthFromEmpty = probe.distanceFromEmptyString(*toSort[i+1]);
+ st = probe.getStrength(*toSort[i+1], *toSort[i]);
+ if(st == UCOL_OFF) {
+ logger->log("Cannot deduce distance from empty using next element. Something is very wrong! Line:");
+ logger->log(toSort[i]->toString(TRUE), TRUE);
+ } else if(st == UCOL_IDENTICAL || st < toSort[i+1]->strengthFromEmpty) {
+ nextStrength = toSort[i+1]->strengthFromEmpty;
+ } else if(st >= toSort[i+1]->strengthFromEmpty) {
+ nextStrength = st;
+ }
+ if(i) {
+ if(prevStrength != nextStrength) {
+ logger->log("Inconsistent results from interpolation! Results will most likely be wrong\n");
+ }
+ }
+ toSort[i]->strengthFromEmpty = nextStrength;
+ }
+ /*
+ UColAttributeValue problemStrength = UCOL_PRIMARY;
+ for(j = lastChange; j < i ; j++) {
+ if(toSort[j]->strength > problemStrength) {
+ problemStrength = toSort[j]->strength;
+ }
+ }
+ for(j = lastChange; j < i ; j++) {
+ toSort[j]->strengthFromEmpty = problemStrength;
+ }
+ strongestStrengthFromEmpty = toSort[i]->strengthFromEmpty;
+ lastChange = i;
+ debug->log("Problem detected in distances from empty. Most probably word sort is on\n");
+ */
+ wordSort = TRUE;
+ }
+ i++;
+ }
+ debug->log("Distances from empty string\n");
+ debug->log(toStringFromEmpty(), TRUE);
+}
+
+void
+SortedLines::analyse(UErrorCode &status) {
+ frenchSecondary = probe.isFrenchSecondary(status);
+ if(U_FAILURE(status)) {
+ logger->log("Test for French secondary failed. Bailing out!\n");
+ return;
+ }
+ logger->log("French secondary value is %i\n", frenchSecondary, frenchSecondary);
+ upperFirst = probe.isUpperFirst(status);
+ if(U_FAILURE(status)) {
+ logger->log("Test for upper first failed. Bailing out!\n");
+ return;
+ }
+ logger->log("upper first value is %i\n", upperFirst, upperFirst);
+ sort(TRUE, TRUE);
+ classifyRepertoire();
+ getBounds(status);
+ //sort(TRUE, TRUE);
+ addContractionsToRepertoire(status);
+ //sort(TRUE, TRUE);
+ debug->log("\n*** Order after detecting contractions\n\n");
+ calculateSortKeys();
+ debug->log(toPrettyString(FALSE, TRUE), TRUE);
+ detectExpansions();
+}
+
+void SortedLines::init()
+{
+ size = repertoire.size();
+ capacity = 5*size;
+ lines = new Line[capacity];
+ init(repertoire, lines);
+}
+
+void SortedLines::init(UnicodeSet &rep, Line *lin)
+{
+
+ UnicodeSetIterator exemplarUSetIter(rep);
+ int32_t size = 0;
+
+ while(exemplarUSetIter.next()) {
+ Line *currLine = lin+size;
+ if(exemplarUSetIter.isString()) { // process a string
+ currLine->setTo(exemplarUSetIter.getString());
+ } else { // process code point
+ currLine->setTo(exemplarUSetIter.getCodepoint());
+ }
+ currLine->name[currLine->len] = 0; // zero terminate, for our evil ways
+ //currLine->index = size;
+ size++;
+ }
+}
+
+void
+SortedLines::setSortingArray(Line **sortingArray, Line *elements, int32_t sizeToSort) {
+ int32_t i = 0;
+ for(i = 0; i < sizeToSort; i++) {
+ sortingArray[i] = &elements[i];
+ }
+}
+
+int32_t
+SortedLines::setSortingArray(Line **sortingArray, Hashtable *table) {
+ int32_t size = table->count();
+ int32_t hashIndex = -1;
+ const UHashElement *hashElement = NULL;
+ int32_t count = 0;
+ while((hashElement = table->nextElement(hashIndex)) != NULL) {
+ sortingArray[count++] = (Line *)hashElement->value.pointer;
+ }
+ return size;
+}
+
+void
+SortedLines::sort(Line **sortingArray, int32_t sizeToSort, UBool setStrengths, UBool link) {
+ int32_t i = 0;
+ int32_t equalStart = 0;
+ UColAttributeValue equalStrength = UCOL_OFF;
+
+ qsort(sortingArray, sizeToSort, sizeof(Line *), probe.comparer);
+
+ if(setStrengths) { // analyze strengths
+ for(i = 1; i < sizeToSort; i++) {
+ sortingArray[i]->strength = probe.getStrength(*sortingArray[i-1], *sortingArray[i]);
+ }
+ // for equal guys, do the code point ordering
+
+ i = 1;
+ while(i < sizeToSort)
+ {
+ if(sortingArray[i]->strength == UCOL_IDENTICAL) {
+ equalStart = i - 1;
+ equalStrength = sortingArray[equalStart]->strength;
+ sortingArray[equalStart]->strength = UCOL_IDENTICAL;
+ while(i < sizeToSort && sortingArray[i]->strength == UCOL_IDENTICAL) {
+ i++;
+ }
+ qsort(sortingArray+equalStart, i-equalStart, sizeof(Line *), codePointCmp);
+ sortingArray[equalStart]->strength = equalStrength;
+ } else {
+ i++;
+ }
+ }
+
+ }
+
+
+
+ if(link) { // do the linking
+ for(i = 0; i < sizeToSort - 1; i++) {
+ Line *curr = *(sortingArray+i);
+ curr->next = *(sortingArray+i+1);
+ (*(sortingArray+i+1))->previous = curr;
+ }
+ }
+}
+
+void
+SortedLines::sort(UBool setStrengths, UBool link) {
+ if(toSortCapacity < size || !toSort) {
+ if(toSort) {
+ delete[] toSort;
+ }
+ toSort = new Line*[size*2];
+ toSortCapacity = size*2;
+ }
+
+ setSortingArray(toSort, lines, size);
+ sort(toSort, size, setStrengths, link);
+
+ first = last = NULL;
+
+ if(link) { // do the linking
+ first = *toSort;
+ last = *(toSort+size-1);
+ }
+}
+
+void
+SortedLines::updateBounds(UnicodeSet &set) {
+ Line line;
+ UnicodeString s1;
+ UnicodeSetIterator it1(set);
+ while(it1.next()) {
+ if(!debug->isOn()) {
+ logger->log(".");
+ }
+ if(it1.isString()) { // process a string
+ s1.setTo(it1.getString());
+ } else { // process code point
+ s1.setTo(it1.getCodepoint());
+ }
+ //line.setTo(s1);
+ UColAttributeValue strength = probe.distanceFromEmptyString(s1);
+ if(probe.compare(UnicodeString(UB[strength]->name), s1) < 0) {
+ // TODO: leak here - fixit!
+ UB[strength] = new Line(s1);
+ //u_strcpy(UB[strength], s1.getTerminatedBuffer());
+ }
+ }
+
+
+
+}
+
+void SortedLines::addAll(Line* toAdd, int32_t toAddSize)
+{
+ if(size+toAddSize > capacity) {
+ int32_t doGrowingBreakpoint = 0;
+ // we need to do growing here
+ }
+ int32_t i = 0;
+
+ for(i = 0; i < toAddSize; i++) {
+ lines[size+i] = toAdd[i];
+ }
+ size += toAddSize;
+}
+
+void SortedLines::setDistancesFromEmpty(Line* array, int32_t arraySize)
+{
+ int32_t i = 0;
+ for(i = 0; i < arraySize; i++) {
+ array[i].strengthFromEmpty = probe.distanceFromEmptyString(array[i]);
+ }
+}
+
+
+// adds contractions in to repertoire
+int32_t SortedLines::addContractionsToRepertoire(UErrorCode &status)
+{
+ logger->log("\n*** Detecting contractions\n\n");
+ contractionsTable = new Hashtable();
+ int32_t noConts = 0;
+ int32_t allocateSize = 50*size;
+ // first check for simple contractions
+ Line* delta = new Line[allocateSize];
+ Line** deltaSorted = new Line*[allocateSize];
+ Line* lesserToAddTo = new Line[allocateSize];
+ Line* newDelta = new Line[allocateSize];
+ Line** newDeltaSorted = new Line*[allocateSize];
+ Line* deltaP = delta;
+ Line** deltaPP = deltaSorted;
+ Line* newDeltaP = newDelta;
+ int32_t deltaSize = 0, lesserToAddToSize = 0, newDeltaSize = 0;
+ logger->log("++ Contraction detection generation 0\n");
+ noConts = detectContractions(toSort, size, toSort, size,
+ delta, deltaSize, lesserToAddTo, lesserToAddToSize, 3*size, status);
+ setSortingArray(deltaSorted, delta, deltaSize);
+ sort(deltaSorted, deltaSize, TRUE);
+
+ setDistancesFromEmpty(delta, deltaSize);
+ int32_t deltaPSize = deltaSize;
+ //updateBounds(delta);
+
+ int32_t generation = 0;
+ // if we found any, we have to try multiple contractions
+ // However, we want to prevent the contractions explosion
+ // if the number of simple contractions is greater than the
+ // starting size, chances are that we either have an algorithmic
+ // contraction (like iteration marks on w2k) or something
+ // is seriosly wrong.
+ if(deltaPSize < size/2) {
+ while (deltaPSize && generation < 1) {
+ generation++;
+ logger->log("\n++ Contraction detection generation %i\n", generation, generation);
+ // find more, but avoid testing the combinations we already have
+ noConts += detectContractions(toSort, size, deltaPP, deltaPSize,
+ newDeltaP, newDeltaSize, lesserToAddTo, lesserToAddToSize, 3*size, status);
+ noConts += detectContractions(deltaPP, deltaPSize, toSort, size,
+ newDeltaP, newDeltaSize, lesserToAddTo, lesserToAddToSize, 3*size, status);
+ calculateSortKeys();
+
+ addAll(deltaP, deltaPSize);
+ setSortingArray(toSort, lines, size);
+ sort(TRUE, TRUE);
+ setSortingArray(newDeltaSorted, newDeltaP, newDeltaSize);
+ sort(newDeltaSorted, newDeltaSize, TRUE);
+
+ // if no new ones, bail
+ //if (newDeltaSize == 0) break;
+
+ deltaPSize = newDeltaSize;
+ newDeltaSize = 0;
+ if(deltaP == delta) {
+ deltaP = newDelta;
+ deltaPP = newDeltaSorted;
+ newDeltaP = delta;
+ } else {
+ deltaP = delta;
+ deltaPP = deltaSorted;
+ newDeltaP = newDelta;
+ }
+ setDistancesFromEmpty(deltaP, deltaPSize);
+ }
+ }
+ status = U_ZERO_ERROR;
+ // add stuff from the last batch
+ addAll(deltaP, deltaPSize);
+
+ // warning: we don't add the lesser ones in recursively, since they will
+ // infinitely loop
+ setDistancesFromEmpty(lesserToAddTo, lesserToAddToSize);
+ addAll(lesserToAddTo, lesserToAddToSize);
+ setSortingArray(toSort, lines, size);
+ sort(TRUE, TRUE);
+
+ delete[] deltaSorted;
+ delete[] delta;
+ delete[] lesserToAddTo;
+ delete[] newDeltaSorted;
+ delete[] newDelta;
+ return noConts;
+}
+
+
+int32_t SortedLines::detectContractions(Line **firstRep, int32_t firstSize,
+ Line **secondRep, int32_t secondSize,
+ Line *toAddTo, int32_t &toAddToSize,
+ Line *lesserToAddTo, int32_t &lesserToAddToSize,
+ int32_t capacity, UErrorCode &status)
+{
+ int32_t noConts = 0;
+ int i = 0, j = 0, k = 0;
+ Line lower, upper, trial, toAdd, helper;
+ UChar32 firstStart, firstEnd, secondStart;
+ UChar NFCTrial[256];
+ int32_t NFCTrialLen = 0;
+ UBool thai;
+ i = -1;
+ while(i < firstSize-1 && U_SUCCESS(status)) {
+ i++;
+ if(!debug->isOn()) {
+ logger->log("\rTesting %05i/%05i. Found %05i conts.", i, firstSize, noConts);
+ }
+ U16_GET(firstRep[i]->name, 0, 0, firstRep[i]->len, firstStart);
+ if(uscript_getScript(firstStart, &status) == USCRIPT_HAN || firstRep[i]->strengthFromEmpty > UCOL_PRIMARY) //UCOL_TERTIARY)
+ {
+ continue;
+ }
+ lower = *firstRep[i];
+ for(j = 0; j < secondSize; j++) {
+ if(noConts == capacity) {
+ return noConts;
+ }
+ U16_GET(secondRep[j]->name, 0, 0, secondRep[j]->len, secondStart);
+ if(firstStart == 0x41 && secondStart == 0x308) {
+ int32_t putBreakPointHere = 0;
+ }
+ if(uscript_getScript(secondStart, &status) == USCRIPT_HAN) // || secondRep[j]->strengthFromEmpty > UCOL_TERTIARY)
+ {
+ continue;
+ }
+ if(duplicators && duplicators->get(UnicodeString(secondRep[j]->name, secondRep[j]->len)) != NULL) {
+ debug->log("Skipping duplicator ");
+ debug->log(secondRep[j]->toString(), TRUE);
+ continue;
+ }
+
+ if(firstRep[i]->name[0] == 0x61 && secondRep[j]->name[0] == 0x308) {
+ int32_t putBreakpointhere = 0;
+ }
+ upper.setToConcat(firstRep[i], UB[UCOL_PRIMARY]);
+ //upper.setToConcat(firstRep[i], UB[secondRep[j]->strengthFromEmpty]);
+ toAdd.setToConcat(firstRep[i], secondRep[j]);
+ U16_GET(firstRep[i]->name, 0, firstRep[i]->len-1, firstRep[i]->len, firstEnd);
+ if((thai = u_hasBinaryProperty(firstEnd, UCHAR_LOGICAL_ORDER_EXCEPTION))) {
+ // this means that the lower is single reordering character
+ // if we do the lower test without taking this into account,
+ // we'll comparing the secondRep directly to Thai. We add UB[UCOL_PRIMARY] to
+ // end of lower and in the middle of trial, so we will have
+ // lower = Thai + UB, trial Thai + UB + x, resolving to
+ // UB + Thai vs UB + Thai + x.
+ // for upper bound, we do the similar, so we have
+ // upper = Thai + UB + UB, trial = Thai + UB + x,
+ // resolving to UB + Thai + UB vs UB + Thai + x
+ if(secondRep[j]->firstCC) {
+ UChar32 UBChar;
+ U16_GET(UB[UCOL_SECONDARY]->name, 0, 0, UB[UCOL_SECONDARY]->len, UBChar);
+ if(secondRep[j]->firstCC > u_getCombiningClass(UBChar)) {
+ continue;
+ }
+ }
+ upper = *firstRep[i];
+ upper.append(*UB[UCOL_PRIMARY]);
+ //upper.append(*UB[secondRep[j]->strengthFromEmpty]);
+ upper.append(*UB[UCOL_PRIMARY]);
+ lower.append(*UB[UCOL_PRIMARY]);
+ trial = *firstRep[i];
+ trial.append(*UB[UCOL_PRIMARY]);
+ trial.append(*secondRep[j]);
+ } else if((firstRep[i]->lastCC > secondRep[j]->firstCC && secondRep[j]->firstCC && !frenchSecondary)
+ || (firstRep[i]->firstCC < secondRep[j]->lastCC && firstRep[i]->firstCC && frenchSecondary)) {
+ // Skip because normalization will reorder
+ // there will be a chance to check this again, since if we
+ // try a+b, we will also try b+a
+ continue;
+ } else if(frenchSecondary && (firstRep[i]->strengthFromEmpty > UCOL_PRIMARY && secondRep[j]->strengthFromEmpty > UCOL_PRIMARY)) {
+ continue;
+ }else if(firstRep[i]->lastCC && secondRep[j]->firstCC && frenchSecondary) {
+ trial.setToConcat(secondRep[j], firstRep[i]);
+ } else {
+ trial.setToConcat(firstRep[i], secondRep[j]);
+ }
+ // Now let's check the trial. The problem is that when you combine characters,
+ // you can end up with concatenation that is unknown for the examined API.
+ NFCTrialLen = unorm_normalize(trial.name, trial.len, UNORM_NFC, 0, NFCTrial, 256, &status);
+ if((u_strcmp(trial.name, NFCTrial) == 0) || u_strFindLast(NFCTrial, NFCTrialLen, secondRep[j]->name, secondRep[j]->len)) {
+ if(secondRep[j]->strengthFromEmpty > UCOL_TERTIARY) {
+ continue;
+ }
+ }
+ UChar32 c;
+ U16_GET(NFCTrial, 0, 0, NFCTrialLen, c);
+ helper.setTo(c);
+ if(probe.distanceFromEmptyString(helper) > UCOL_TERTIARY) {
+ continue;
+ }
+ if(NFCTrialLen > 1) {
+ U16_GET(NFCTrial, 0, NFCTrialLen-1, NFCTrialLen, c);
+ helper.setTo(c);
+ if(probe.distanceFromEmptyString(helper) > UCOL_TERTIARY) {
+ continue;
+ }
+ }
+
+ if (probe.compare(lower, trial) >= 0) { // if lower is bigger than trial
+ // this might be ok, but I'm having doubts. Here is an additional check:
+ if(firstRep[i]->len == 1 || secondRep[j]->strengthFromEmpty == UCOL_PRIMARY) {
+ // I'm basically saying that I'll add this kind of contraction for cases where I combine
+ // one letter with an accent OR when I'm combining more than one symbol with a letter.
+ noteContraction("L", lesserToAddTo, lesserToAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+ }
+ else if (probe.compare(trial, upper) > 0) { // trial is bigger than upper??
+ noteContraction("U", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+#if 0
+ else if(firstRep[i]->strengthFromEmpty == UCOL_PRIMARY)
+ {
+ Line expansionLine;
+ if(getExpansionLine(trial, *firstRep[i], *secondRep[j], expansionLine) &&
+ expansionLine.len && !(expansionLine == *secondRep[j])) {
+ noteContraction("D", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+ }
+#endif
+ else if(firstRep[i]->strengthFromEmpty == UCOL_PRIMARY && probe.getStrength(lower, trial) < secondRep[j]->strengthFromEmpty) {
+ noteContraction("D1", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+ else if (firstRep[i]->strengthFromEmpty == UCOL_PRIMARY && secondRep[j]->strengthFromEmpty == UCOL_PRIMARY)
+ {
+ // I have added an additional check. The checks versus upper and lower bound should be sufficient
+ // when the right side is a combining mark. There might be a reordering of combining marks, but
+ // that should be already visible in their order.
+ // compare the sequence
+ // Y- <? Y <? Y+
+ // and
+ // XY- <? XY <? XY+
+ Line xym, xyp, xy;
+ UBool xymIsContraction = FALSE, toAddIsContraction = FALSE;
+ if(j) {
+ if(((!secondRep[j-1]->firstCC || firstRep[i]->lastCC < secondRep[j-1]->firstCC) && !frenchSecondary)
+ ||((!firstRep[i]->firstCC || firstRep[i]->firstCC > secondRep[j-1]->lastCC) && frenchSecondary)) {
+ xym.setToConcat(firstRep[i], secondRep[j-1]);
+ toAdd.strength = probe.getStrength(xym, toAdd);
+ if(secondRep[j]->strength != toAdd.strength) {
+ // there is possibility that either xym or xy are contractions
+ // There are two situations:
+ // xym > xy or xym <n xy and ym <k y but n != k
+ // if they are reordered, we are going to see if each of them
+ // is further reordered
+ if(toAdd.strength == UCOL_OFF) {
+ // check whether toAdd shifted more down
+ k = j - 2;
+ while(k>=0 && secondRep[k]->strength > secondRep[j]->strength) {
+ k--;
+ }
+ while(!toAddIsContraction && k>=0) {
+ xyp.setToConcat(firstRep[i], secondRep[k]);
+ if(contractionsTable->get(UnicodeString(xyp.name, xyp.len)) != NULL) {
+ k--;
+ continue;
+ }
+ if(probe.compare(xyp, xym) >= 0) {
+ // xyp looks like a contraction
+ noteContraction("!1", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ toAddIsContraction = TRUE;
+ } else {
+ break;
+ }
+ }
+ // first let's see if xym has moved beyond
+ if(contractionsTable->get(UnicodeString(xym.name, xym.len)) == NULL) {
+ k = j+1;
+ // ignore weaker strengths
+ while(k < secondSize && secondRep[k]->strength > secondRep[j]->strength) {
+ k++;
+ }
+ // check if we skipped the following guy
+ if(k < secondSize) {
+ xyp.setToConcat(firstRep[i], secondRep[k]);
+ if(probe.compare(xyp, xym) <= 0) {
+ // xyp looks like a contraction
+ noteContraction("!2", toAddTo, toAddToSize, firstRep[i], secondRep[j-1], noConts, status);
+ xymIsContraction = TRUE;
+ }
+ }
+ } else {
+ xymIsContraction = TRUE;
+ }
+ // if they have reordered, but none has moved, then we add them both
+ // and hope for the best
+ if(!xymIsContraction && !toAddIsContraction) {
+ // it is possible that there is an NFC version version of one of the
+ // strings. If we have XY > XZ, but NFC(XZ) = W and X < W, we might have
+ // have a false contraction.
+ trial.len = unorm_normalize(toAdd.name, toAdd.len, UNORM_NFC, 0, trial.name, 25, &status);
+ //UColAttributeValue strength = probe.getStrength(*firstRep[i], trial);
+ if(trial == toAdd) {
+ noteContraction("!3", toAddTo, toAddToSize, firstRep[i], secondRep[j-1], noConts, status);
+ noteContraction("!3", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ } else {
+ noteContraction("!4", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+ }
+ } else { // only the strength has changed
+ // check whether the previous is contraction and if not, add the current
+ if(contractionsTable->get(UnicodeString(xym.name, xym.len)) == NULL) {
+ noteContraction("!5", toAddTo, toAddToSize, firstRep[i], secondRep[j], noConts, status);
+ }
+ }
+ }
+ }
+ }
+ }
+ if(thai) { // restore lower
+ lower = *firstRep[i];
+ }
+ }
+ }
+ return noConts;
+}
+
+void
+SortedLines::noteContraction(const char* msg, Line *toAddTo, int32_t &toAddToSize, Line *left, Line *right, int32_t &noConts, UErrorCode &status)
+{
+ Line toAdd;
+ toAdd.setToConcat(left, right);
+ toAdd.left = left;
+ toAdd.right = right;
+ // if we're adding an accent to an existing contraction, we want to check
+#if 0
+ Line test, trial1, trial2;
+ if(right->strengthFromEmpty > UCOL_PRIMARY) {
+ if(left->right && left->right->previous && left->right->next) {
+ test.setToConcat(left->left, left->right->previous);
+ trial1.setToConcat(&test, right);
+
+ test.setToConcat(left->left, left->right->next);
+ trial2.setToConcat(&test, right);
+ if(probe.compare(trial1, toAdd) < 0 && probe.compare(toAdd, trial2) < 0) {
+ // this means that the contraction has been broken by the newly added accent
+ // so while 'ch' is contraction, 'ch'+dot_above sorts between 'cg'+dot_above and 'ci'+dot_above
+ debug->log("Con -");
+ debug->log(msg);
+ debug->log(toAdd.toString(FALSE), TRUE);
+ return;
+ }
+ } else {
+ if(right->previous && right->next) {
+ trial1.setToConcat(left, right->previous);
+ trial2.setToConcat(left, right->next);
+ if(probe.compare(trial1, toAdd) < 0 && probe.compare(toAdd, trial2) < 0) {
+ // this means that the contraction has been broken by the newly added accent
+ // so while 'ch' is contraction, 'ch'+dot_above sorts between 'cg'+dot_above and 'ci'+dot_above
+ debug->log("Con -");
+ debug->log(msg);
+ debug->log(toAdd.toString(FALSE), TRUE);
+ return;
+ }
+ }
+ if(left->previous && left->next) {
+ trial1.setToConcat(left->previous, right);
+ trial2.setToConcat(left->next, right);
+ if(probe.compare(trial1, toAdd) < 0 && probe.compare(toAdd, trial2) < 0) {
+ // this means that the contraction has been broken by the newly added accent
+ // so while 'ch' is contraction, 'ch'+dot_above sorts between 'cg'+dot_above and 'ci'+dot_above
+ debug->log("Con -");
+ debug->log(msg);
+ debug->log(toAdd.toString(FALSE), TRUE);
+ return;
+ }
+ }
+
+ }
+ }
+ if(right->right && right->right->strengthFromEmpty > UCOL_PRIMARY && right->left->previous && right->left->next) { // maybe we already had a contraction with an accent
+ test.setToConcat(right->left->previous, right->right);
+ trial1.setToConcat(left, &test);
+ test.setToConcat(right->left->next, right->right);
+ trial2.setToConcat(left, &test);
+ if(probe.compare(trial1, toAdd) < 0 && probe.compare(toAdd, trial2) < 0) {
+ // this means that the contraction has been broken by the newly added accent
+ // so while 'ch' is contraction, 'ch'+dot_above sorts between 'cg'+dot_above and 'ci'+dot_above
+ debug->log("Con -");
+ debug->log(msg);
+ debug->log(toAdd.toString(FALSE), TRUE);
+ return;
+ }
+ }
+#endif
+ if(contractionsTable->get(UnicodeString(toAdd.name, toAdd.len)) == NULL) {
+ if(probe.distanceFromEmptyString(toAdd) <= UCOL_TERTIARY) {
+ toAddTo[toAddToSize++] = toAdd;
+ contractionsTable->put(UnicodeString(toAdd.name, toAdd.len), &toAdd, status);
+ noConts++;
+ debug->log(msg);
+ debug->log(" Con + ");
+ debug->log(toAdd.toString(FALSE), TRUE);
+
+ if(!left->sortKey) {
+ calculateSortKey(*left);
+ }
+ debug->log(left->dumpSortkey());
+ debug->log(" + ");
+
+ if(!right->sortKey) {
+ calculateSortKey(*right);
+ }
+ debug->log(right->dumpSortkey());
+ debug->log(" = ");
+
+ calculateSortKey(toAdd);
+ debug->log(toAdd.dumpSortkey(), TRUE);
+ if(noConts > size/2) {
+ status = U_BUFFER_OVERFLOW_ERROR;
+ }
+ }
+ }
+}
+
+
+UBool
+SortedLines::getExpansionLine(const Line &expansion, const Line &previous, const Line &exp, Line &expansionLine)
+{
+ int expIndexSize = 0;
+ UColAttributeValue expStrength = UCOL_OFF;
+ int32_t comparisonResult = 0;
+ int32_t i = 0, k = 0, prevK = 0;
+ Line trial;
+ UBool sequenceCompleted = FALSE;
+ int32_t expIndexes[256];
+ int32_t expIndexesSize = 0;
+
+ if(!sequenceCompleted) {
+ expIndexSize = 0;
+ expansionLine.clear();
+
+ // we will start from strength between the expansion
+ // and the target (toSort[i] and toSort[j]. First we
+ // will add as many primaries as possible. Then we will
+ // try to add secondary pieces and then tertiary.
+ // found an expansion - what is the expanding sequence?
+
+ expStrength = UCOL_PRIMARY;
+ while(!sequenceCompleted) {
+ k = 0;
+ prevK = 0;
+ while(k < size) {
+ if(expansionLine.len > 15) {
+ sequenceCompleted = TRUE;
+ break;
+ }
+ while(k < size && toSort[k]->strength != UCOL_PRIMARY)
+ {
+ k++;
+ }
+ // nothing found
+ if(k == size) {
+ break;
+ }
+ // we need to skip over reordering things. If they were worthy, they would
+ // have been detected in the previous iteration.
+ //if(expansionLine.lastCC && toSort[k]->firstCC && expansionLine.lastCC > toSort[k]->firstCC) {
+ //k++;
+ //continue;
+ //}
+ trial = previous;
+ trial.append(expansionLine);
+ trial.append(*toSort[k]);
+ if(toSort[k]->name[0] == 0x0067) {
+ int32_t putBreakPointHere = 0;
+ }
+ comparisonResult = probe.compare(trial, expansion);
+ if(comparisonResult == 0) {
+ expansionLine = *toSort[k];
+ return TRUE;
+ } else if (comparisonResult > 0) {
+ if(prevK) {
+ if(exp == *toSort[prevK]) {
+ expansionLine = exp;
+ return TRUE;
+ }
+ i = prevK;
+ while(i < k-1) {
+ i++;
+ if(toSort[i]->strength > exp.strength) {
+ continue;
+ }
+ trial = previous;
+ trial.append(expansionLine);
+ trial.append(*toSort[i]);
+ if(probe.compare(trial, expansion) > 0) {
+ break;
+ }
+ }
+ // we got into situation where we have ch > ch+dot-below
+ // however, ch is a contraction and therefore we cannot use
+ // it properly. If we have hit on a contraction, we'll just try
+ // to continue. Probably need more logic here.
+ if(contractionsTable->get(UnicodeString(trial.name, trial.len)) == NULL) {
+ expansionLine.append(*toSort[i-1]);
+ expIndexes[expIndexSize++] = i-1;
+ break;
+ } else {
+ int32_t putBreakPointHere = 0;
+ }
+ } else {
+ sequenceCompleted = TRUE;
+ break;
+ }
+ //break;
+ }
+ prevK = k;
+ k++;
+ }
+ if(!prevK || k == size) {
+ break;
+ }
+ }
+ }
+ return expIndexSize > 0;
+}
+
+int32_t
+SortedLines::gooseUp(int32_t resetIndex, int32_t expansionIndex, Line &expLine, int32_t *expIndexes, int32_t &expIndexSize, UColAttributeValue strength)
+{
+ int32_t i = expansionIndex, k = resetIndex+1, n = 0, m = 0, start = 0;
+ UBool haveChanges = FALSE;
+ Line trial, prefix, suffix;
+ // we will first try goosing up the reset index
+ //while(toSort[k]->strength >= strength)
+ for( ; toSort[k]->strength == strength; k++)
+ {
+ //if(toSort[k]->strength > strength) {
+ //continue;
+ //}
+ trial.setToConcat(toSort[k], &expLine);
+ if(probe.compare(trial, *toSort[i]) > 0) {
+ break;
+ }
+ }
+ resetIndex = k-1;
+
+ // goose up individual characters
+ prefix = *toSort[resetIndex];
+ for(n = 0; n < expIndexSize; n++) {
+ suffix.clear();
+ for(m = n+1; m < expIndexSize; m++) {
+ suffix.append(*toSort[expIndexes[m]]);
+ }
+ k = expIndexes[n]+1;
+ //while(toSort[k]->strength >= strength)
+ for( ; toSort[k]->strength == strength; k++)
+ {
+ //if(toSort[k]->strength > strength) {
+ //continue;
+ //}
+ trial.setToConcat(&prefix, toSort[k]);
+ trial.append(suffix);
+ if(probe.compare(trial, *toSort[i]) > 0) {
+ break;
+ }
+ }
+ if(k > expIndexes[n]+1) {
+ haveChanges = TRUE;
+ expIndexes[n] = k-1;
+ }
+ prefix.append(*toSort[expIndexes[n]]);
+ }
+
+ // try inserting ingorables
+ UColAttributeValue lastStr = UCOL_OFF;
+ k = 0;
+ while(toSort[k]->strengthFromEmpty > strength) {
+ k++;
+ }
+ if(toSort[k]->strengthFromEmpty == strength) {
+ start = k;
+ prefix = *toSort[resetIndex];
+ n = 0;
+ while(n <= expIndexSize) {
+ suffix.clear();
+ for(m = n; m < expIndexSize; m++) {
+ suffix.append(*toSort[expIndexes[m]]);
+ }
+ k = start;
+ while(toSort[k]->strengthFromEmpty == strength) {
+ trial.setToConcat(&prefix, toSort[k]);
+ trial.append(suffix);
+ lastStr = probe.getStrength(trial, *toSort[i]);
+ if(lastStr == UCOL_OFF) { // shot over - we won't find anything here
+ break;
+ } else if(lastStr > strength) {
+ for(m = expIndexSize; m > n; m--) {
+ expIndexes[m] = expIndexes[m-1];
+ }
+ expIndexes[n] = k;
+ expIndexSize++;
+ haveChanges = TRUE;
+ break;
+ }
+#if 0
+ if(probe.compare(trial, *toSort[i]) > 0) {
+ // if the first one skips, that means that
+ // this position doesn't work
+ if(k > start) {
+ // insert an ignorable on position n
+ for(m = expIndexSize; m > n; m--) {
+ expIndexes[m] = expIndexes[m-1];
+ }
+ expIndexes[n] = k-1;
+ expIndexSize++;
+ haveChanges = TRUE;
+ if(n == expIndexSize-1) { // added to the end of the string
+ UColAttributeValue str = probe.getStrength(trial, *toSort[i]);
+ int32_t putBreakHere = 0;
+ }
+ }
+ break;
+ } else {
+ lastStr = probe.getStrength(trial, *toSort[i]);
+ }
+#endif
+ k++;
+ }
+ prefix.append(*toSort[expIndexes[n]]);
+ n++;
+ }
+ }
+
+ if(haveChanges) {
+ expLine.clear();
+ for(m = 0; m < expIndexSize; m++) {
+ expLine.append(*toSort[expIndexes[m]]);
+ }
+ }
+ return resetIndex;
+}
+
+int32_t
+SortedLines::detectExpansions()
+{
+ logger->log("\n*** Detecting expansions\n\n");
+ int32_t exCount = 0;
+ int32_t i = 0, j = 0, k = 0, prevK = 0;
+ Line *previous, trial, expansionLine;
+ UBool foundExp = FALSE, sequenceCompleted = FALSE;
+ UColAttributeValue strength = UCOL_OFF;
+ UColAttributeValue maxStrength = UCOL_IDENTICAL;
+ UColAttributeValue expStrength = UCOL_OFF;
+ int32_t expIndexes[256];
+ int32_t expIndexSize = 0;
+ memset(expIndexes, 0, sizeof(expIndexes));
+
+ // for each element, we look back to find whether there is such a q for which
+ // q <n x < qUBn. These are possible expansions. When going backwards we skip
+ // over already detected expansions.
+ i = 0;
+ // it turns out that looking at accents as possible expansions is
+ // quite a stupid thing to do, especially on non ICU platforms.
+ // Previously this line skipped over identicals only, but
+ // now we are going to skip all the way to non-ignorables.
+ while(toSort[i]->strengthFromEmpty > UCOL_PRIMARY) {
+ i++;
+ }
+ i++;
+ for( ; i < size; i++) {
+ if(toSort[i]->name[0]==0x0063 && toSort[i]->name[1] == 0x68) // && toSort[i]->name[1] == 0x308)0043 0043 0219
+ {
+ int32_t putBreakpointhere = 0;
+ }
+ foundExp = FALSE;
+ sequenceCompleted = FALSE;
+ strength = toSort[i]->strength;
+ if(strength == UCOL_IDENTICAL && toSort[i-1]->isExpansion == TRUE) {
+ u_strcpy(toSort[i]->expansionString, toSort[i-1]->expansionString);
+ toSort[i]->expLen = toSort[i-1]->expLen;
+ toSort[i]->isExpansion = TRUE;
+ toSort[i]->expIndex = toSort[i-1]->expIndex;
+ toSort[i]->expStrength = UCOL_IDENTICAL;
+ //toSort[i]->expStrength = toSort[i-1]->expStrength;
+ foundExp = TRUE;
+ sequenceCompleted = TRUE;
+ }
+ //logger->log("%i %i\n", i, j);
+ while(!foundExp && strength <= maxStrength) {
+ j = i-1;
+ while(j && (toSort[j]->isExpansion == TRUE || toSort[j]->isRemoved == TRUE)) {
+ //if(toSort[j]->strength < strength) {
+ //strength = toSort[j]->strength;
+ //}
+ j--;
+ }
+
+ //while(j && toSort[j]->strength > strength)
+ while(j && toSort[j]->strength > probe.getStrength(*toSort[j], *toSort[i]))
+ {
+ j--;
+ }
+ //if(toSort[j]->strength == strength) {
+ previous = toSort[j];
+ if(previous->strengthFromEmpty >= UCOL_IDENTICAL ||
+ (previous->strengthFromEmpty == UCOL_SECONDARY
+ && strength == UCOL_SECONDARY
+ && previous->lastCC > UB[strength]->firstCC)) {
+ break;
+ //continue;
+ }
+ //trial.setToConcat(previous, UB[strength]);
+ trial.setToConcat(previous, UB[probe.getStrength(*toSort[j], *toSort[i])]);
+ if(probe.compare(trial, *toSort[i]) > 0) {
+ foundExp = TRUE;
+ }
+ //}
+ if(strength == UCOL_QUATERNARY) {
+ strength = UCOL_IDENTICAL;
+ } else {
+ strength = (UColAttributeValue)(strength + 1);
+ }
+ }
+ // calculate the expanding sequence
+ if(foundExp && !sequenceCompleted) {
+ expIndexSize = 0;
+ expansionLine.clear();
+ exCount++;
+ // we will start from strength between the expansion
+ // and the target (toSort[i] and toSort[j]. First we
+ // will add as many primaries as possible. Then we will
+ // try to add secondary pieces and then tertiary.
+ // found an expansion - what is the expanding sequence?
+
+ expStrength = UCOL_PRIMARY;
+ while(!sequenceCompleted) {
+ k = 0;
+ prevK = 0;
+ while(k < size) {
+ if(expansionLine.len > 15) {
+ sequenceCompleted = TRUE;
+ break;
+ }
+ while(k < size && toSort[k]->strength != UCOL_PRIMARY) {
+ k++;
+ }
+ // nothing found
+ if(k == size) {
+ break;
+ }
+ // we need to skip over reordering things. If they were worthy, they would
+ // have been detected in the previous iteration.
+ //if(expansionLine.lastCC && toSort[k]->firstCC && expansionLine.lastCC > toSort[k]->firstCC) {
+ //k++;
+ //continue;
+ //}
+ trial = *previous;
+ trial.append(expansionLine);
+ trial.append(*toSort[k]);
+ if(toSort[k]->name[0] == 0x0067) {
+ int32_t putBreakPointHere = 0;
+ }
+ if(probe.compare(trial, *toSort[i]) > 0) {
+ if(prevK) {
+ // we got into situation where we have ch > ch+dot-below
+ // however, ch is a contraction and therefore we cannot use
+ // it properly. If we have hit on a contraction, we'll just try
+ // to continue. Probably need more logic here.
+ if(contractionsTable->get(UnicodeString(trial.name, trial.len)) == NULL) {
+ expansionLine.append(*toSort[prevK]);
+ expIndexes[expIndexSize++] = prevK;
+ break;
+ } else {
+ int32_t putBreakPointHere = 0;
+ }
+ } else {
+ sequenceCompleted = TRUE;
+ break;
+ }
+ //break;
+ }
+ prevK = k;
+ k++;
+ }
+ if(!prevK || k == size) {
+ break;
+ }
+ }
+ // after this we have primaries lined up.
+ // we are going to goose up with secondaries and
+ // tertiaries
+ trial.setToConcat(toSort[j], &expansionLine);
+ expStrength = probe.getStrength(trial, *toSort[i]);
+ if(expStrength > UCOL_PRIMARY) {
+ if(expStrength == UCOL_SECONDARY || expStrength == UCOL_OFF) {
+ j = gooseUp(j, i, expansionLine, expIndexes, expIndexSize, UCOL_SECONDARY);
+ trial.setToConcat(toSort[j], &expansionLine);
+ expStrength = probe.getStrength(trial, *toSort[i]);
+ if(expStrength == UCOL_TERTIARY) {
+ j = gooseUp(j, i, expansionLine, expIndexes, expIndexSize, UCOL_TERTIARY);
+ }
+ } else if(expStrength == UCOL_TERTIARY) {
+ j = gooseUp(j, i, expansionLine, expIndexes, expIndexSize, UCOL_TERTIARY);
+ }
+ }
+ trial.setToConcat(toSort[j], &expansionLine);
+ expStrength = probe.getStrength(trial, *toSort[i]);
+ if(expansionLine.len) {
+ if(expansionLine.name[0] == 0x73 && expansionLine.name[1] == 0x7a) {
+ int32_t putBreakpointhere = 0;
+ }
+ UBool isExpansionLineAContraction = (contractionsTable->get(UnicodeString(expansionLine.name, expansionLine.len)) != NULL);
+ // we have an expansion line and an expansion. There could be some expansions where
+ // the difference between expansion line and the end of expansion sequence is less or
+ // equal than the expansion strength. These should probably be removed.
+ int32_t diffLen = toSort[i]->len - expansionLine.len;
+ if(diffLen > 0) {
+ trial.setTo(UnicodeString(toSort[i]->name + diffLen, toSort[i]->len - diffLen));
+ } else {
+ trial = *toSort[i];
+ }
+ UColAttributeValue s1 = probe.getStrength(trial, expansionLine);
+ if(s1 == UCOL_OFF) {
+ s1 = probe.getStrength(expansionLine, trial);
+ }
+ if((!isExpansionLineAContraction && s1 >= expStrength) || (diffLen <= 0 && s1 == UCOL_IDENTICAL)) {
+ contractionsTable->remove(UnicodeString(toSort[i]->name, toSort[i]->len));
+ toSort[i]->isRemoved = TRUE;
+ if(toSort[i]->next && toSort[i]->previous) {
+ toSort[i]->previous->next = toSort[i]->next;
+ }
+ if(toSort[i]->previous && toSort[i]->next) {
+ toSort[i]->next->previous = toSort[i]->previous;
+ }
+ debug->log("Exp -N: ");
+ debug->log(toSort[i]->toString(FALSE));
+ debug->log(" / ");
+ debug->log(expansionLine.toString(FALSE), TRUE);
+ }
+ else
+ {
+ u_strncat(toSort[i]->expansionString, expansionLine.name, expansionLine.len);
+ toSort[i]->isExpansion = TRUE;
+ toSort[i]->expStrength = expStrength;
+ toSort[i]->expLen = expansionLine.len;
+ toSort[i]->expansionString[toSort[i]->expLen] = 0;
+ toSort[i]->expIndex = j;
+ }
+ }
+ }
+ if(toSort[i]->isExpansion == TRUE) {
+ if(debug->isOn()) {
+ debug->log("Exp + : &");
+ debug->log(toSort[j]->toString(FALSE));
+ debug->log(toSort[i]->strengthToString(toSort[i]->expStrength, TRUE));
+ debug->log(toSort[i]->toString(FALSE));
+ debug->log(" ");
+ if(!toSort[j]->sortKey) {
+ calculateSortKey(*toSort[j]);
+ }
+ debug->log(toSort[j]->dumpSortkey());
+ debug->log(" ... ");
+ if(!toSort[i]->sortKey) {
+ calculateSortKey(*toSort[i]);
+ }
+ debug->log(toSort[i]->dumpSortkey());
+ calculateSortKey(expansionLine);
+ debug->log("/");
+ debug->log(expansionLine.dumpSortkey(), TRUE);
+ }
+
+ }
+ }
+ // after detecting expansions, we want to position them.
+ // it is better to position expansions after all have been detected,
+ // since otherwise we will change the ordering.
+ for(i = size-1; i >= 0; i--) {
+ if(toSort[i]->isExpansion) {
+ if(toSort[i]->name[0] == 0x2A3) {
+ int32_t putBreakPointHere = 0;
+ }
+ if(i) {
+ if(toSort[i]->previous) {
+ toSort[i]->previous->next = toSort[i]->next;
+ }
+ }
+ if(i < size-1) {
+ if(toSort[i]->next) {
+ toSort[i]->next->previous = toSort[i]->previous;
+ }
+ }
+ j = toSort[i]->expIndex;
+ toSort[i]->next = toSort[j]->next;
+ toSort[i]->previous = toSort[j];
+ toSort[j]->next = toSort[i];
+ if(toSort[i]->next) {
+ toSort[i]->next->previous = toSort[i];
+ }
+ toSort[i]->strength = toSort[i]->expStrength;
+ }
+ }
+ return exCount;
+}
+
+
+Line *
+SortedLines::getFirst() {
+ current = first;
+ return current;
+}
+
+Line *
+SortedLines::getLast() {
+ current = last;
+ return current;
+}
+
+void
+SortedLines::add(Line *line, UBool linkIn) {
+ if(size++ == capacity) {
+ // grow
+ }
+ lines[size] = *line;
+ Line *toAdd = &lines[size];
+ if(linkIn && first) {
+ Line *current = first;
+ while(current != NULL && probe.comparer(¤t, &toAdd) < 0) {
+ current = current->next;
+ }
+ if(current == NULL) {
+ toAdd->previous = last;
+ toAdd->next = NULL;
+ if(last != NULL) {
+ last->next = toAdd;
+ }
+ last = toAdd;
+ if(first == NULL) {
+ first = toAdd;
+ }
+ } else { // current != NULL
+ toAdd->next = current;
+ toAdd->previous = current->previous;
+ if(current->previous) {
+ current->previous->next = toAdd;
+ } else {
+ first = toAdd;
+ }
+ current->previous = toAdd;
+ }
+ }
+}
+
+
+Line *
+SortedLines::getNext()
+{
+ if(current != NULL) {
+ current=current->next;
+ }
+ return current;
+}
+
+Line *
+SortedLines::getPrevious()
+{
+ if(current != NULL) {
+ current=current->previous;
+ }
+ return current;
+}
+
+Line *
+SortedLines::operator[](int32_t index)
+{
+ int32_t i = 0;
+ Line *c = first;
+ for(i = 0; i < index; i++) {
+ if(c != NULL) {
+ c = c->next;
+ }
+ }
+ return c;
+}
+
+UnicodeString
+SortedLines::arrayToString(Line** sortedLines, int32_t linesSize, UBool pretty, UBool useLinks, UBool printSortKeys) {
+ UnicodeString result;
+ int32_t i = 0;
+
+ Line *line = NULL;
+ Line *previous = sortedLines[0];
+ if(printSortKeys && !sortkeys) {
+ printSortKeys = FALSE;
+ }
+ if(previous->isReset) {
+ result.append(" & ");
+ result.append(previous->name, previous->len);
+ if(pretty) {
+ result.append(" # ");
+ result.append(previous->stringToName(previous->name, previous->len));
+ result.append("\n");
+ }
+ } else if(!previous->isRemoved) {
+ result.append(previous->toString(pretty));
+ if(pretty) {
+ result.append("\n");
+ }
+ }
+ i = 1;
+ while((i < linesSize && !useLinks) || (previous->next && useLinks)) {
+ if(useLinks) {
+ line = previous->next;
+ } else {
+ line = sortedLines[i];
+ }
+ if(line->isReset) {
+ result.append(" &");
+ result.append(line->name, line->len);
+ if(pretty) {
+ result.append(" # ");
+ result.append(line->stringToName(line->name, line->len));
+ result.append("\n");
+ }
+ } else if(!line->isRemoved) {
+ if(i > 0) {
+ result.append(line->strengthToString(line->strength, pretty));
+ }
+ result.append(line->toString(pretty));
+ if(printSortKeys) {
+ result.append(line->dumpSortkey());
+ }
+ if(pretty) {
+ result.append("\n");
+ }
+ }
+ previous = line;
+ i++;
+ }
+ return result;
+}
+
+SortedLines::SortedLines(FILE *file, UPrinter *logger, UPrinter *debug, UErrorCode &status) :
+toSort(NULL),
+toSortCapacity(0),
+lines(NULL),
+size(0),
+capacity(0),
+first(NULL),
+last(NULL),
+logger(logger),
+debug(debug),
+contractionsTable(NULL),
+duplicators(NULL),
+maxExpansionPrefixSize(0),
+wordSort(FALSE),
+frenchSecondary(FALSE),
+upperFirst(FALSE),
+sortkeys(NULL),
+sortkeyOffset(0)
+{
+ debug->log("*** loading a dump\n");
+ memset(UB, 0, sizeof(UB));
+ int32_t i = 0;
+ for(i = 0; i < UCOL_OFF; i++) {
+ UB[i] = ∅
+ }
+
+ int32_t newFrench, newUpperFirst;
+ fscanf(file, "%i,%i,%i\n", &size, &newFrench, &newUpperFirst);
+ debug->log("Read size %i, frenchSecondary %i and upperFirst %i\n", size, newFrench, newUpperFirst);
+ frenchSecondary = (UBool)newFrench;
+ upperFirst = (UBool)newUpperFirst;
+ capacity = size;
+ lines = new Line[capacity];
+ i = 0;
+
+ char buff[256];
+
+ while(fgets(buff, 256, file)) {
+ if(i % 20 == 0) {
+ logger->log("\rLine: %04i", i, buff);
+ }
+ lines[i].initFromString(buff, 256, status);
+ if(i) {
+ lines[i].previous = &lines[i-1];
+ lines[i-1].next = &lines[i];
+ }
+ i++;
+ }
+ size = i;
+ toSort = new Line*[size];
+ setSortingArray(toSort, lines, size);
+ first = &lines[0];
+ last = &lines[size-1];
+}
+
+void
+SortedLines::toFile(FILE *file, UBool useLinks, UErrorCode &status)
+{
+ fprintf(file, "%i,%i,%i\n", size, frenchSecondary, upperFirst);
+ int32_t i = 1;
+ Line *previous = toSort[0];
+ Line *line = NULL;
+ char buff[256];
+ previous->write(buff, 256, status);
+ fprintf(file, "%s\n", buff);
+ fflush(file);
+ while(previous->next) {
+ if(useLinks) {
+ line = previous->next;
+ } else {
+ line = toSort[i];
+ }
+ line->write(buff, 256, status);
+ fprintf(file, "%s\n", buff);
+ i++;
+ previous = line;
+ }
+}
+
+
+
+UnicodeString
+SortedLines::toStringFromEmpty() {
+ UBool useLinks = FALSE;
+ UBool pretty = FALSE;
+ UnicodeString result;
+ int32_t i = 0;
+
+ Line *line = NULL;
+ Line *previous = toSort[0];
+ if(previous->isReset) {
+ result.append(" & ");
+ if(pretty) {
+ result.append("\n");
+ }
+ result.append(previous->name, previous->len);
+ } else if(!previous->isRemoved) {
+ result.append(previous->toString(pretty));
+ if(pretty) {
+ result.append("\n");
+ }
+ }
+ i = 1;
+ while(i < size || previous->next) {
+ if(useLinks) {
+ line = previous->next;
+ } else {
+ line = toSort[i];
+ }
+ if(line->isReset) {
+ result.append(" &");
+ result.append(line->name, line->len);
+ if(pretty) {
+ result.append(" # ");
+ result.append(line->stringToName(line->name, line->len));
+ result.append("\n");
+ }
+ } else if(!line->isRemoved) {
+ if(i > 0) {
+ result.append(line->strengthToString(line->strengthFromEmpty, pretty));
+ }
+ result.append(line->toString(pretty));
+ if(pretty) {
+ result.append("\n");
+ }
+ }
+ previous = line;
+ i++;
+ }
+ return result;
+}
+
+UnicodeString
+SortedLines::toString(UBool useLinks)
+{
+ return arrayToString(toSort, size, FALSE, useLinks, FALSE);
+}
+
+
+UnicodeString
+SortedLines::toPrettyString(UBool useLinks, UBool printSortKeys)
+{
+ return arrayToString(toSort, size, TRUE, useLinks, printSortKeys);
+}
+
+UnicodeString
+SortedLines::toOutput(const char *format,
+ const char *locale, const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome) {
+ if(strcmp(format, "HTML") == 0) {
+ return toHTML(locale, platform, reference, useLinks, initialize, moreToCome);
+ } else if(strcmp(format, "XML") == 0) {
+ return toXML(locale, platform, reference, useLinks, initialize, moreToCome);
+ } else {
+ return toBundle(locale, platform, reference, useLinks, initialize, moreToCome);
+ }
+}
+
+
+UnicodeString
+SortedLines::toHTML(const char *locale,
+ const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome)
+{
+ UnicodeString result;
+ int32_t i = 0;
+ if(initialize) {
+ result.append("<html>\n<head>\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n</head>\n");
+ result.append("# Collation data resource bundle generated for locale: ");
+ result.append(locale);
+ result.append("<br>\n# For platform ");
+ result.append(platform);
+ result.append(" reference platform ");
+ result.append(reference);
+ result.append("<br><br>\n\n\n");
+
+ result.append(locale);
+ if(platform) {
+ result.append("_");
+ result.append(platform);
+ }
+ if(reference) {
+ result.append("_vs_");
+ result.append(reference);
+ }
+ result.append(" {<br>\n");
+
+ result.append(" collations {<br>\n standard {<br>\n Sequence {<br>\n");
+ }
+
+ if(frenchSecondary) {
+ result.append("[backwards 2]<br>\n");
+ }
+ if(upperFirst) {
+ result.append("[casefirst upper]<br>\n");
+ }
+
+ Line *line = toSort[0];
+
+ i = 0;
+ while((i < size && !useLinks) || (line->next && useLinks)) {
+ if(line->isReset || !line->isRemoved) {
+ result.append(line->toHTMLString());
+ }
+ i++;
+ if(useLinks) {
+ line = line->next;
+ } else {
+ line = toSort[i];
+ }
+ }
+ if(!moreToCome) {
+ result.append(" }<br>\n }<br>\n }<br>\n}<br>\n");
+
+ result.append("</html>\n");
+ }
+
+ return result;
+}
+
+UnicodeString
+SortedLines::toXML(const char *locale,
+ const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome)
+{
+ UnicodeString result;
+ int32_t i = 0;
+ if(initialize) {
+ result.append("<html>\n<head>\n<meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\">\n</head>\n");
+ result.append("# Collation data resource bundle generated for locale: ");
+ result.append(locale);
+ result.append("<br>\n# For platform ");
+ result.append(platform);
+ result.append(" reference platform ");
+ result.append(reference);
+ result.append("<br><br>\n\n\n");
+
+ result.append(locale);
+ if(platform) {
+ result.append("_");
+ result.append(platform);
+ }
+ if(reference) {
+ result.append("_vs_");
+ result.append(reference);
+ }
+ result.append(" {<br>\n");
+
+ result.append(" collations {<br>\n standard {<br>\n Sequence {<br>\n");
+ }
+
+ if(frenchSecondary) {
+ result.append("[backwards 2]<br>\n");
+ }
+ if(upperFirst) {
+ result.append("[casefirst upper]<br>\n");
+ }
+
+ Line *line = toSort[0];
+
+ i = 0;
+ while((i < size && !useLinks) || (line->next && useLinks)) {
+ if(line->isReset || !line->isRemoved) {
+ result.append(line->toHTMLString());
+ }
+ i++;
+ if(useLinks) {
+ line = line->next;
+ } else {
+ line = toSort[i];
+ }
+ }
+ if(!moreToCome) {
+ result.append(" }<br>\n }<br>\n }<br>\n}<br>\n");
+
+ result.append("</html>\n");
+ }
+
+ return result;
+}
+
+UnicodeString
+SortedLines::toBundle(const char *locale,
+ const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome)
+{
+ UnicodeString result;
+ int32_t i = 0;
+
+ if(initialize) {
+ result.append("// Collation data resource bundle generated for locale: ");
+ result.append(locale);
+ result.append("\n// For platform ");
+ result.append(platform);
+ result.append(" reference platform ");
+ result.append(reference);
+ result.append("\n\n\n");
+
+ result.append(locale);
+ /*
+ if(platform) {
+ result.append("_");
+ result.append(platform);
+ }
+ if(reference) {
+ result.append("_vs_");
+ result.append(reference);
+ }
+ */
+ result.append(" {\n");
+
+ result.append(" collations {\n standard {\n Sequence {\n");
+ }
+
+ if(frenchSecondary) {
+ result.append("[backwards 2]\n");
+ }
+ if(upperFirst) {
+ result.append("[casefirst upper]\n");
+ }
+
+ Line *line = toSort[0];
+
+ i = 0;
+ while((i < size && !useLinks) || (line->next && useLinks)) {
+ if(line->isReset || !line->isRemoved) {
+ result.append(line->toBundleString());
+ }
+ i++;
+ if(useLinks) {
+ line = line->next;
+ } else {
+ line = toSort[i];
+ }
+ }
+
+ if(!moreToCome) {
+ result.append(" }\n }\n }\n}\n");
+ }
+
+ return result;
+}
+
+
+int32_t
+SortedLines::getSize() const {
+ return repertoire.size();
+}
+
+void
+SortedLines::reduceDifference(SortedLines& reference) {
+ UErrorCode status = U_ZERO_ERROR;
+ if(upperFirst) {
+ swapCase();
+ }
+ // both sorted lines structures need to have established links and strengths
+ // We walk down both structures and note differences. These
+ // differences will modify this by removng elements, setting resets
+ // etc...
+ // we will prefer insertions from tailoring to reference, then deletions
+ // there are two tables that keep seen elements.
+ Hashtable *seenThis = new Hashtable();
+ Hashtable *seenReference = new Hashtable();
+
+
+ UBool found = FALSE;
+ UBool finished = FALSE;
+ const int32_t lookForward = 20;
+ int32_t tailoringMove = 0;
+ //int32_t referenceSize = reference.getSize();
+ Line *refLine = reference.getFirst();
+ Line *refLatestEqual = refLine;
+ refLine = refLine->next;
+ Line *myLine = getFirst();
+ Line *myLatestEqual = myLine;
+ myLatestEqual->isRemoved = TRUE;
+ myLine = myLine->next;
+ while(myLine && refLine) {
+ found = FALSE;
+ while(myLine && refLine && myLine->equals(*refLine)) {
+ myLatestEqual = myLine;
+ myLatestEqual->isRemoved = TRUE;
+ myLine = myLine->next;
+ refLatestEqual = refLine;
+ refLine = refLine->next;
+ if(refLine == NULL && myLine == NULL) {
+ finished = TRUE;
+ }
+ }
+ if(myLine) {
+ myLine->cumulativeStrength = myLine->strength;
+ }
+ if(refLine) {
+ refLine->cumulativeStrength = refLine->strength;
+ }
+
+ // here is the difference
+ while(!found && !finished) {
+ tailoringMove = 0;
+ if(myLine && refLine) {
+ if(myLine->cumulativeStrength > refLine->cumulativeStrength) {
+ // tailoring z <<< x, UCA z < y
+ while(myLine->cumulativeStrength > refLine->cumulativeStrength) {
+ myLine = myLine->next;
+ if(myLine) {
+ transferCumulativeStrength(myLine->previous, myLine);
+ } else {
+ break;
+ }
+ }
+ } else if(myLine->cumulativeStrength < refLine->cumulativeStrength) {
+ // tailoring z < x, UCA z <<< y
+ while(myLine->cumulativeStrength < refLine->cumulativeStrength) {
+ seenReference->put(UnicodeString(refLine->name, refLine->len), refLine, status);
+ refLine = refLine->next;
+ if(refLine) {
+ transferCumulativeStrength(refLine->previous, refLine);
+ } else {
+ break;
+ }
+ }
+ }
+ // this is the interesting point. Now we search for character match
+ while(myLine && refLine && (!myLine->equals(*refLine) || myLine->strength == UCOL_IDENTICAL)
+ && tailoringMove < lookForward) {
+ if(seenThis->get(UnicodeString(refLine->name, refLine->len))) {
+ // we are not interested in stuff from the reference that is already accounted
+ // for in the tailoring.
+ refLine = refLine->next;
+ if(refLine) {
+ transferCumulativeStrength(refLine->previous, refLine);
+ }
+ } else {
+ myLine = myLine->next;
+ if(myLine) {
+ transferCumulativeStrength(myLine->previous, myLine);
+ if(!seenReference->get(UnicodeString(myLine->name, myLine->len))) {
+ tailoringMove++;
+ }
+ }
+ }
+ }
+ }
+ if(refLine == NULL) { // ran out of reference
+ // this is the tail of tailoring - the last insertion
+ myLine = NULL;
+ found = TRUE;
+ } else if(tailoringMove == lookForward || myLine == NULL) { // run over treshold or out of tailoring
+ tailoringMove = 0;
+ // we didn't find insertion after all
+ // we will try substitution next
+ // reset the tailoring pointer
+ myLine = myLatestEqual->next;
+ // move the reference
+ refLine = refLine->next;
+ if(refLine) {
+ transferCumulativeStrength(refLine->previous, refLine);
+ }
+ } else { // we found an insertion
+ tailoringMove = 0;
+ if(myLine->strength != refLine->strength) {
+ while(myLine && refLine && *myLine == *refLine
+ && (myLine->strength != refLine->strength
+ || myLine->strength == UCOL_IDENTICAL)) {
+ myLine = myLine->next;
+ refLine = refLine->next;
+ }
+ if(*myLine != *refLine) {
+ continue;
+ }
+ }
+ if(myLine && refLine && myLine->previous->strength < myLine->strength) {
+ myLine = myLine->next;
+ refLine = refLine->next;
+ if(*myLine != *refLine) {
+ continue;
+ }
+ }
+ found = TRUE;
+ }
+ if(found) {
+ if(myLatestEqual->next != myLine || refLine == NULL) {
+ Line *myStart = NULL;
+ // this is a reset and a sequence
+ // myLatestEqual points at the last point that was the same
+ // This point will be a reset
+ if(myLine && refLine) { // if there is anything more to do - it might be worth saving it
+ myStart = myLatestEqual;
+ while(myStart != myLine) {
+ seenThis->put(UnicodeString(myStart->name, myStart->len), myStart, status);
+ myStart = myStart->next;
+ }
+ }
+ // Try to weed out stuff that is not affected, like:
+ // Tailoring:
+ // <<<S<<\u017F<\u0161<<<\u0160<t
+ // UCA:
+ // <<<S<<\u0161<<<\u0160<<\u017F<t
+ // Result:
+ // &S<<\u017F<\u0161<<<\u0160
+ // we have a sequence that spans from myLatestEqual to myLine (that one could be NULL,
+ // so we have to go down from myLatestEqual.
+ // Basically, for every element, we want to see the strongest cumulative difference
+ // from the reset point. If the cumulative difference is the same in both the reference and
+ // tailoring, that element could be removed.
+ calculateCumulativeStrengths(myLatestEqual, myLine);
+ calculateCumulativeStrengths(refLatestEqual, refLine);
+ myStart = myLatestEqual;
+ int32_t removed = 0;
+ int32_t traversed = 0;
+ while(myStart && myStart != myLine) {
+ Line *refStart = refLatestEqual;
+ while(refStart && refStart != refLine) {
+ if(*myStart == *refStart) {
+ if(myStart->cumulativeStrength == refStart->cumulativeStrength) {
+ myStart->isRemoved = TRUE;
+ removed++;
+ }
+ }
+ refStart = refStart->next;
+ }
+ myStart = myStart->next;
+ traversed++;
+ }
+ if(removed < traversed) {
+ myLatestEqual->isReset = TRUE;
+ myLatestEqual->isRemoved = FALSE;
+ }
+
+ myLatestEqual = myLine;
+ }
+ }
+ }
+ }
+
+ if(upperFirst) {
+ //swapCase();
+ }
+
+ delete seenThis;
+ delete seenReference;
+
+}
+
+void
+SortedLines::transferCumulativeStrength(Line *previous, Line *that) {
+ if(that->strength > previous->cumulativeStrength) {
+ that->cumulativeStrength = previous->cumulativeStrength;
+ } else {
+ that->cumulativeStrength = that->strength;
+ }
+}
+
+void
+SortedLines::calculateCumulativeStrengths(Line *start, Line *end) {
+ // start is a reset - end may be NULL
+ start = start->next;
+ UColAttributeValue cumulativeStrength = UCOL_OFF;
+ while(start && start != end) {
+ if(start->strength < cumulativeStrength) {
+ cumulativeStrength = start->strength;
+ }
+ start->cumulativeStrength = cumulativeStrength;
+ start = start->next;
+ }
+}
+
+
+void
+SortedLines::getRepertoire(UnicodeSet &fillIn) {
+ fillIn.clear();
+ fillIn.addAll(repertoire);
+}
+
+
+void
+SortedLines::removeDecompositionsFromRepertoire() {
+ UnicodeSetIterator repertoireIter(repertoire);
+ UErrorCode status = U_ZERO_ERROR;
+ UChar string[256];
+ UChar composed[256];
+ int32_t len = 0, compLen = 0;
+ UnicodeString compString;
+ UnicodeSet toRemove;
+
+ while(repertoireIter.next()) {
+ len = 0;
+ if(repertoireIter.isString()) { // process a string
+ len = repertoireIter.getString().length();
+ u_memcpy(string, repertoireIter.getString().getBuffer(), len);
+ } else { // process code point
+ UBool isError = FALSE;
+ U16_APPEND(string, len, 25, repertoireIter.getCodepoint(), isError);
+ }
+ string[len] = 0; // zero terminate, for our evil ways
+ compLen = unorm_normalize(string, len, UNORM_NFC, 0, composed, 256, &status);
+ if(compLen != len || u_strcmp(string, composed) != 0) {
+ compString.setTo(composed, compLen);
+ if(repertoire.contains(compString)) {
+ toRemove.add(UnicodeString(string, len));
+ }
+ }
+ }
+ debug->log("\nRemoving\n");
+ debug->log(toRemove.toPattern(compString, TRUE), TRUE);
+ repertoire.removeAll(toRemove);
+}
+
+
+void
+SortedLines::swapCase()
+{
+ int32_t i = 0;
+ for(i = 0; i < size; i++) {
+ toSort[i]->swapCase();
+ }
+}
+
+void
+SortedLines::calculateSortKey(Line &line)
+{
+ if(!sortkeys) {
+ sortkeys = new uint8_t[size*1024];
+ memset(sortkeys, 0, size*1024);
+ }
+ line.sortKey = sortkeys+sortkeyOffset;
+ sortkeyOffset += probe.getSortKey(line, sortkeys+sortkeyOffset, size*256-sortkeyOffset);
+}
+
+
+void
+SortedLines::calculateSortKeys()
+{
+ if(sortkeys) {
+ delete[] sortkeys;
+ }
+ sortkeyOffset = 0;
+ sortkeys = new uint8_t[size*256];
+ memset(sortkeys, 0, size*256);
+ int32_t i = 0;
+ for(i = 0; i < size; i++) {
+ calculateSortKey(*toSort[i]);
+ }
+}
diff --git a/colprobe/sortedlines.h b/colprobe/sortedlines.h
new file mode 100755
index 0000000..5f37006
--- /dev/null
+++ b/colprobe/sortedlines.h
@@ -0,0 +1,120 @@
+#ifndef COLPROBE_SORTEDLINES_H
+#define COLPROBE_SORTEDLINES_H
+
+// colprobe includes
+#include "colprobe.h"
+#include "line.h"
+#include "uprinter.h"
+#include "strengthprobe.h"
+
+
+// ICU includes
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+#include "unicode/uscript.h"
+#include "hash.h"
+
+class SortedLines {
+ Line empty;
+ Line *UB[UCOL_OFF];
+ UnicodeSet ignorables[UCOL_OFF];
+
+ Line **toSort;
+ int32_t toSortCapacity;
+ Line *lines;
+ int32_t size;
+ int32_t capacity;
+
+ UnicodeSet repertoire;
+ UnicodeSet excludeBounds;
+
+ StrengthProbe probe;
+
+ Line *first;
+ Line *last;
+ Line *current;
+ SortedLines() {};
+
+ UPrinter *logger;
+ UPrinter *debug;
+
+ Hashtable *contractionsTable;
+ Hashtable *duplicators; // elements that duplicate preceding characters
+ int32_t maxExpansionPrefixSize;
+
+ // Properties of the sort
+ UBool wordSort;
+ UBool frenchSecondary;
+ UBool upperFirst;
+
+ uint8_t *sortkeys;
+ int32_t sortkeyOffset;
+public:
+ SortedLines(const UnicodeSet &set, const UnicodeSet &excludeBounds, const StrengthProbe &probe, UPrinter *logger, UPrinter *debug);
+ SortedLines(FILE *file, UPrinter *logger, UPrinter *debug, UErrorCode &status);
+ ~SortedLines();
+ void analyse(UErrorCode &status);
+
+ void sort(UBool setStrengths = TRUE, UBool link = FALSE);
+ void sort(Line **sortingArray, int32_t sizeToSort, UBool setStrengths = TRUE, UBool link = FALSE);
+
+ Line *getFirst();
+ Line *getLast();
+ void add(Line *line, UBool linkIn = FALSE);
+ void insert(Line *line, int32_t index);
+ Line *getNext();
+ Line *getPrevious();
+ Line *operator[](int32_t index);
+ int32_t addContractionsToRepertoire(UErrorCode &status);
+
+ int32_t getSize() const;
+
+ int32_t detectExpansions();
+
+ UnicodeString toString(UBool useLinks = FALSE);
+ UnicodeString toStringFromEmpty();
+ UnicodeString toPrettyString(UBool useLinks, UBool printSortKeys = FALSE);
+ UnicodeString toOutput(const char *format,
+ const char *locale, const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome);
+ UnicodeString toBundle(const char *locale, const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome);
+ UnicodeString toHTML(const char *locale, const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome);
+ UnicodeString toXML(const char *locale, const char *platform, const char *reference,
+ UBool useLinks, UBool initialize, UBool moreToCome);
+ UnicodeString arrayToString(Line** sortedLines, int32_t linesSize, UBool pretty, UBool useLinks, UBool printSortKeys);
+ void setSortingArray(Line **sortingArray, Line *elements, int32_t sizeToSort);
+ int32_t setSortingArray(Line **sortingArray, Hashtable *table);
+
+ void reduceDifference(SortedLines& reference);
+ void getRepertoire(UnicodeSet &fillIn);
+ void removeDecompositionsFromRepertoire();
+ void getBounds(UErrorCode &status);
+ void classifyRepertoire();
+ void toFile(FILE *file, UBool useLinks, UErrorCode &status);
+ void swapCase();
+ void calculateSortKeys();
+ void calculateSortKey(Line &line);
+private:
+ void init();
+ void init(UnicodeSet &rep, Line *lin);
+ int32_t detectContractions(Line **firstRep, int32_t firstSize,
+ Line **secondRep, int32_t secondSize,
+ Line *toAddTo, int32_t &toAddToSize,
+ Line *lesserToAddTo, int32_t &lesserToAddToSize,
+ int32_t capacity, UErrorCode &status);
+
+ void calculateCumulativeStrengths(Line *start, Line *end);
+ void transferCumulativeStrength(Line *previous, Line *that);
+ void updateBounds(UnicodeSet &set);
+ void addAll(Line* toAdd, int32_t toAddSize);
+ void setDistancesFromEmpty(Line* array, int32_t arraySize);
+ void noteContraction(const char* msg, Line *toAddTo, int32_t &toAddToSize, Line *left, Line *right, int32_t &noConts, UErrorCode &status);
+ int32_t gooseUp(int32_t resetIndex, int32_t expansionIndex, Line &expLine, int32_t *expIndexes, int32_t &expIndexSize, UColAttributeValue strength);
+ UBool getExpansionLine(const Line &expansion, const Line &previous, const Line &exp, Line &expansionLine);
+
+
+};
+
+#endif // #ifndef COLPROBE_SORTEDLINES_H
diff --git a/colprobe/strengthprobe.cpp b/colprobe/strengthprobe.cpp
new file mode 100755
index 0000000..afa94b6
--- /dev/null
+++ b/colprobe/strengthprobe.cpp
@@ -0,0 +1,402 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File line.h
+*
+* Modification History:
+*
+* Date Name Description
+* 07/07/2003 weiv Creation.
+*******************************************************************************
+*/
+
+//
+// class Line
+//
+// Each line from the source file (containing a name, presumably) gets
+// one of these structs.
+//
+
+#include "strengthprobe.h"
+
+StrengthProbe::StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE,
+ UChar B0, UChar B1, UChar B2, UChar B3) :
+SE(SE),
+B0(B0), B1(B1), B2(B2), B3(B3),
+utilFirstP(&utilFirst), utilSecondP(&utilSecond),
+frenchSecondary(FALSE),
+comparer(comparer), skgetter(getter)
+{
+}
+
+int
+StrengthProbe::setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3)
+{
+ this->B0 = B0;
+ this->B1 = B1;
+ this->B2 = B2;
+ this->
+B3 = B3;
+ return checkSanity();
+}
+
+int
+StrengthProbe::checkSanity()
+{
+ int sanityRes;
+ utilFirst.setTo(B0);
+ utilSecond.setTo(B3);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 3;
+ }
+ utilSecond.setTo(B2);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 2;
+ }
+ utilSecond.setTo(B1);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 1;
+ }
+ utilFirst.setTo(B3);
+ utilSecond.setTo(B2);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 5;
+ }
+ utilSecond.setTo(B1);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 4;
+ }
+ utilFirst.setTo(B2);
+ if((sanityRes = comparer(&utilFirstP, &utilSecondP)) >= 0) {
+ return sanityRes*10 + 6;
+ }
+ utilFirst.setTo(B0);
+ if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
+ return 1000;
+ }
+ utilFirst.setTo(B1);
+ if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
+ return 1001;
+ }
+ utilFirst.setTo(B2);
+ if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
+ return 1002;
+ }
+ utilFirst.setTo(B3);
+ if(distanceFromEmptyString(utilFirst) > UCOL_PRIMARY) {
+ return 1003;
+ }
+ return 0;
+}
+
+UBool
+StrengthProbe::probePrefix(const Line &x, const Line &y, UChar first, UChar second) {
+ utilFirst.name[0] = first;
+ utilFirst.name[1] = SE;
+ u_strcpy(utilFirst.name+2, x.name);
+ utilFirst.name[x.len+2] = 0;
+ utilFirst.len = x.len+2;
+
+ utilSecond.name[0] = second;
+ utilSecond.name[1] = SE;
+ u_strcpy(utilSecond.name+2, y.name);
+ utilSecond.name[y.len+2] = 0;
+ utilSecond.len = y.len+2;
+
+ if(comparer(&utilFirstP, &utilSecondP) < 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool
+StrengthProbe::probeSuffix(const Line &x, const Line &y, UChar first, UChar second) {
+ u_strcpy(utilFirst.name, x.name);
+ utilFirst.name[x.len] = SE;
+ utilFirst.name[x.len+1] = first;
+ utilFirst.name[x.len+2] = 0;
+ utilFirst.len = x.len + 2;
+ u_strcpy(utilSecond.name, y.name);
+ utilSecond.name[y.len] = SE;
+ utilSecond.name[y.len+1] = second;
+ utilSecond.name[y.len+2] = 0;
+ utilSecond.len = y.len + 2;
+
+ if(comparer(&utilFirstP, &utilSecondP) < 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool
+StrengthProbe::probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
+ utilFirst.name[0] = first;
+ u_strcpy(utilFirst.name+1, x.name);
+ utilFirst.name[x.len+1] = 0;
+ utilFirst.len = x.len + 1;
+
+ utilSecond.name[0] = second;
+ u_strcpy(utilSecond.name+1, y.name);
+ utilSecond.name[y.len+1] = 0;
+ utilSecond.len = y.len + 1;
+
+ if(comparer(&utilFirstP, &utilSecondP) < 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UBool
+StrengthProbe::probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second) {
+ u_strcpy(utilFirst.name, x.name);
+ utilFirst.name[x.len] = first;
+ utilFirst.name[x.len+1] = 0;
+ utilFirst.len = x.len + 1;
+ u_strcpy(utilSecond.name, y.name);
+ utilSecond.name[y.len] = second;
+ utilSecond.name[y.len+1] = 0;
+ utilSecond.len = y.len + 1;
+
+ if(comparer(&utilFirstP, &utilSecondP) < 0) {
+ return TRUE;
+ } else {
+ return FALSE;
+ }
+}
+
+UColAttributeValue
+StrengthProbe::getStrength(const Line &x, const Line &y) {
+ const Line *xp = &x;
+ const Line *yp = &y;
+
+ Line empty;
+ Line *emptyP = ∅
+ if(comparer(&emptyP, &xp) == 0) {
+ return distanceFromEmptyString(y);
+ }
+
+ int32_t result = comparer(&xp, &yp);
+
+ if(result == 0) {
+ return UCOL_IDENTICAL;
+ } else if(result > 0) {
+ return UCOL_OFF; // bad situation
+ } else { // we need to probe strength
+ if(probeSuffix(x, y, B1, B0)) {
+ //if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
+ return UCOL_PRIMARY;
+ } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
+ return UCOL_SECONDARY;
+ } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
+ return UCOL_TERTIARY;
+ } else if(!probePrefix(x, y, B3, B0)) {
+ return UCOL_QUATERNARY;
+ }
+ /*
+ //if(probeSuffix(x, y, B1, B0)) {
+ if(probePrefix(x, y, B2, B0)) { // swamps secondary difference
+ return UCOL_PRIMARY;
+ } else if(probePrefix(x, y, B3, B0)) { // swamps tertiary difference
+ return UCOL_SECONDARY;
+ } else if(probeSuffix(x, y, B3, B0)) { // swamped by tertiary difference
+ return UCOL_TERTIARY;
+ } else if(!probePrefix(x, y, B3, B0)) {
+ return UCOL_QUATERNARY;
+ }
+ */
+ }
+ return UCOL_OFF; // bad
+}
+
+UColAttributeValue
+StrengthProbe::getStrength(const UnicodeString &sx, const UnicodeString &sy) {
+ Line x(sx);
+ Line y(sy);
+ return getStrength(x, y);
+}
+
+int32_t
+StrengthProbe::compare(const UnicodeString &sx, const UnicodeString &sy) {
+ Line x(sx);
+ Line y(sy);
+ const Line *xp = &x;
+ const Line *yp = &y;
+ return comparer(&xp, &yp);
+}
+
+int32_t
+StrengthProbe::compare(const Line &x, const Line &y) {
+ const Line *xp = &x;
+ const Line *yp = &y;
+ return comparer(&xp, &yp);
+}
+
+UColAttributeValue
+StrengthProbe::distanceFromEmptyString(const Line &x) {
+ if(x.name[0] == 0x30D) {
+ int32_t putBreakPointHere = 0;
+ }
+ Line empty;
+ Line *emptyP = ∅
+ uint8_t buff[256];
+ getSortKey(empty.name, empty.len, buff, 256);
+ Line B0Line(B0);
+ Line *B0LineP = &B0Line;
+ const Line *xp = &x;
+ int32_t result = comparer(&emptyP, &xp);
+ if(result == 0) {
+ return UCOL_IDENTICAL;
+ } else if(result > 0) {
+ return UCOL_OFF;
+ }
+ result = comparer(&B0LineP, &xp);
+ if(result <= 0) {
+ return UCOL_PRIMARY;
+ }
+ Line sexb0(SE);
+ sexb0.append(x.name, x.len);
+ sexb0.append(B0);
+
+ Line seb0(SE);
+ seb0.append(B0);
+ uint8_t seb0K[256];
+ uint8_t sexb0K[256];
+ uint8_t seb2K[256];
+ uint8_t seb3K[256];
+ memset(seb0K, 0, 256);
+ memset(sexb0K, 0, 256);
+ memset(seb2K, 0, 256);
+ memset(seb3K, 0, 256);
+
+ getSortKey(seb0, seb0K, 256);
+ getSortKey(sexb0, sexb0K, 256);
+
+ if(compare(seb0, sexb0) <= 0) {
+ Line seb2(SE);
+ seb2.append(B2);
+ getSortKey(seb2, seb2K, 256);
+ result = compare(seb2, sexb0);
+ if((result <= 0 && !frenchSecondary) || (result >= 0 && frenchSecondary)) { // swamps tertiary difference
+ return UCOL_SECONDARY;
+ }
+ Line seb3(SE);
+ seb3.append(B3);
+ getSortKey(seb3, seb3K, 256);
+ if(compare(seb3, sexb0) < 0) {
+ return UCOL_TERTIARY;
+ }
+ return UCOL_QUATERNARY;
+ } else {
+ // if this was UCA, we would have a primary difference.
+ // however, this might not be so, since not everybody
+ // makes well formed CEs.
+ // in cs_CZ on linux, space is tertiary ignorable, but
+ // its quaternary level strength is lower than quad
+ // strengths for non-ignorables. oh well, more testing
+ // required
+ // I think that we can only have quaternary difference
+ // here (in addition to primary difference).
+ //if(!probePrefix(x, empty, B3, B0)) {
+ //return UCOL_QUATERNARY;
+ //} else {
+ return UCOL_PRIMARY;
+ //}
+ }
+}
+
+UColAttributeValue
+StrengthProbe::distanceFromEmptyString(const UnicodeString &x) {
+ const Line xp(x);
+ return distanceFromEmptyString(xp);
+}
+
+
+UColAttributeValue
+StrengthProbe::getPrefixedStrength(const Line &prefix, const Line &x, const Line &y) {
+ contractionUtilFirst.setToConcat(&prefix, &x);
+ contractionUtilSecond.setToConcat(&prefix, &y);
+ return getStrength(contractionUtilFirst, contractionUtilSecond);
+}
+
+
+StrengthProbe::StrengthProbe(const StrengthProbe &that) {
+ *this = that;
+}
+
+StrengthProbe &
+StrengthProbe::operator=(const StrengthProbe &that) {
+ if(this != &that) {
+ B0 = that.B0;
+ B1 = that.B1;
+ B2 = that.B2;
+ B3 = that.B3;
+ SE = that.SE;
+ frenchSecondary = that.frenchSecondary;
+ comparer = that.comparer;
+ skgetter = that.skgetter;
+
+ utilFirstP = &utilFirst;
+ utilSecondP = &utilSecond;
+ }
+
+ return *this;
+}
+
+UBool
+StrengthProbe::isFrenchSecondary(UErrorCode &status) {
+ utilFirst.setTo(B0);
+ utilFirst.append(SE);
+ utilFirst.append(B2);
+ utilSecond.setTo(B2);
+ utilSecond.append(SE);
+ utilSecond.append(B0);
+
+ int32_t result = compare(utilFirst, utilSecond);
+
+ if(result < 0) {
+ return FALSE;
+ } else if(result > 0) {
+ frenchSecondary = TRUE;
+ return TRUE;
+ } else {
+ status = U_INTERNAL_PROGRAM_ERROR;
+ return FALSE;
+ }
+}
+
+UBool
+StrengthProbe::isUpperFirst(UErrorCode &status) {
+ UChar i = 0;
+ int32_t result = 0;
+ int32_t upper = 0, lower = 0, equal = 0;
+ for(i = 0x41; i < 0x5B; i++) {
+ utilFirst.setTo(i);
+ utilSecond.setTo(i+0x20);
+ result = compare(utilFirst, utilSecond);
+ if(result < 0) {
+ upper++;
+ } else if(result > 0) {
+ lower++;
+ } else {
+ equal++;
+ }
+ }
+
+ if(lower == 0 && equal == 0) {
+ return TRUE;
+ }
+ if(upper == 0 && equal == 0) {
+ return FALSE;
+ }
+ status = U_INTERNAL_PROGRAM_ERROR;
+ return FALSE;
+}
+
diff --git a/colprobe/strengthprobe.h b/colprobe/strengthprobe.h
new file mode 100755
index 0000000..ec925b2
--- /dev/null
+++ b/colprobe/strengthprobe.h
@@ -0,0 +1,85 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File line.h
+*
+* Modification History:
+*
+* Date Name Description
+* 07/07/2003 weiv Creation.
+*******************************************************************************
+*/
+
+//
+// class Line
+//
+// Each line from the source file (containing a name, presumably) gets
+// one of these structs.
+//
+
+#ifndef COLPROBE_STRENGTHPROBE_H
+#define COLPROBE_STRENGTHPROBE_H
+
+#include "colprobe.h"
+#include "line.h"
+#include "unicode/uniset.h"
+#include "unicode/usetiter.h"
+
+class StrengthProbe {
+public:
+ UChar SE;
+ UChar B0;
+ UChar B1;
+ UChar B2;
+ UChar B3;
+private:
+ Line utilFirst;
+ Line utilSecond;
+ Line *utilFirstP;
+ Line *utilSecondP;
+ Line contractionUtilFirst;
+ Line contractionUtilSecond;
+ UBool probePrefix(const Line &x, const Line &y, UChar first, UChar second);
+ UBool probeSuffix(const Line &x, const Line &y, UChar first, UChar second);
+ UBool probePrefixNoSep(const Line &x, const Line &y, UChar first, UChar second);
+ UBool probeSuffixNoSep(const Line &x, const Line &y, UChar first, UChar second);
+
+ UBool frenchSecondary;
+
+public:
+ CompareFn comparer;
+ GetSortKeyFn skgetter;
+
+ StrengthProbe() {};
+ StrengthProbe(CompareFn comparer, GetSortKeyFn getter, UChar SE = 0x0030, UChar B0 = 0x0061, UChar B1 = 0x0062, UChar B2 = 0x00E1, UChar B3 = 0x0041); //, UChar LB = 0x0039, UChar UB = 0xfa29);
+ int setProbeChars(UChar B0, UChar B1, UChar B2, UChar B3);
+ int checkSanity();
+ StrengthProbe(const StrengthProbe &that);
+ StrengthProbe &operator=(const StrengthProbe &that);
+ UColAttributeValue getStrength(const Line &x, const Line &y);
+ UColAttributeValue getStrength(const UnicodeString &x, const UnicodeString &y);
+ UColAttributeValue getPrefixedStrength(const Line &prefix, const Line &x, const Line &y);
+ int32_t compare(const UnicodeString &x, const UnicodeString &y);
+ int32_t compare(const Line &x, const Line &y);
+ UColAttributeValue distanceFromEmptyString(const Line &x);
+ UColAttributeValue distanceFromEmptyString(const UnicodeString &x);
+ UBool isFrenchSecondary(UErrorCode &status);
+ UBool isUpperFirst(UErrorCode &status);
+ int getSortKey(const Line &l, uint8_t *buffer, int32_t buffCap) {
+ return skgetter(l.name, l.len, buffer, buffCap);
+ };
+
+ int getSortKey(UChar *string, int32_t sLen, uint8_t *buffer, int32_t buffCap) {
+ return skgetter(string, sLen, buffer, buffCap);
+ };
+
+};
+
+
+#endif //#ifndef COLPROBE_STRENGTHPROBE_H
+
diff --git a/colprobe/tableStarter.pl b/colprobe/tableStarter.pl
new file mode 100755
index 0000000..d1b7fbb
--- /dev/null
+++ b/colprobe/tableStarter.pl
@@ -0,0 +1,16 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $localeMinusA = `cat /home/weiv/src/icu/source/extra/colprobe/locale.txt`;
+my @locales = split(/\n/, $localeMinusA);
+my $locale;
+my $command;
+
+foreach $locale (@locales) {
+ if($locale =~ /_/ && !($locale =~ /^#/)) {
+ $command = "/home/weiv/src/icu/source/extra/colprobe/doComparisonTable.pl $locale";
+ print "$command\n";
+ `$command`;
+ }
+}
diff --git a/colprobe/targetsetgenerator.cpp b/colprobe/targetsetgenerator.cpp
new file mode 100755
index 0000000..e3dcbe8
--- /dev/null
+++ b/colprobe/targetsetgenerator.cpp
@@ -0,0 +1,8 @@
+#include "targetsetgenerator.h"
+
+TargetSetGenerator::TargetSetGenerator(UnicodeSet &startingSet, CompareFn comparer) :
+ comparer(comparer),
+ set(startingSet)
+{
+ addAll(startingSet);
+}
diff --git a/colprobe/targetsetgenerator.h b/colprobe/targetsetgenerator.h
new file mode 100755
index 0000000..cd89734
--- /dev/null
+++ b/colprobe/targetsetgenerator.h
@@ -0,0 +1,15 @@
+#ifndef TARGETSETGENERATOR_H
+#define TARGETSETGENERATOR_H
+
+#include "colprobe.h"
+#include "unicode/uniset.h"
+
+class TargetSetGenerator : public UnicodeSet {
+public:
+ TargetSetGenerator(UnicodeSet &startingSet, CompareFn comparer);
+private:
+ CompareFn comparer;
+ UnicodeSet set;
+};
+
+#endif
\ No newline at end of file
diff --git a/colprobe/template b/colprobe/template
new file mode 100755
index 0000000..1a1aa91
--- /dev/null
+++ b/colprobe/template
@@ -0,0 +1,48 @@
+<html>
+
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+<title>$locale</title>
+<style>
+ <!--
+ table { border-spacing: 0; border-collapse: collapse; width: 100%;
+ border: 1px solid black }
+td, th { width: 10%; border-spacing: 0; border-collapse: collapse; color: black;
+ vertical-align: top; border: 1px solid black }
+-->
+ </style>
+</head>
+
+<body bgcolor="#FFFFFF">
+
+<p><b><font color="#FF0000">Collation:</font> $locale <a href="http://oss.software.ibm.com/cgi-bin/icu/lx/en/?_=$locale">Demo</a>,
+
+<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/all_diff_xml/comparison_charts.html">Cover
+Page</a>, <a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/all_diff_xml/index.html">Index</a></b></p>
+<table>
+ <tr>
+ <th bgcolor="#AD989D">COMMON (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/common/xml/$locale.xml">xml</a>)</th>
+ <th bgcolor="#1191F1">LINUX (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/linux/xml/$locale.xml">xml</a>)</th>
+ <th bgcolor="#98FB98">WINDOWS (<a href="http://oss.software.ibm.com/cvs/icu/~checkout~/locale/windows/xml/$locale.xml">xml</a>)</th>
+ </tr>
+
+ <tr>
+ <td bgcolor="#AD989D">1.0-alpha</td>
+ <td bgcolor="#FF6633">1.0</td>
+ <td bgcolor="#FF6633">=</td>
+ <td bgcolor="#FF6633"><span title="006E {LATIN SMALL LETTER N}">&n</span><br>
+ <span title="006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> < ny</span><br>
+
+ <span title="006E 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y} / 006E 0079 {LATIN SMALL LETTER N} {LATIN SMALL LETTER Y}"> = nny / ny</span><br>
+ <span title="006E 0059 {LATIN SMALL LETTER N} {LATIN CAPITAL LETTER Y}"> <<< nY</span><br>
+ </td>
+ <td bgcolor="#FF6633">=</td>
+ <td bgcolor="#FFFF33">1.2</td>
+
+ <td bgcolor="#98FB98">Windows XP</td>
+ <td bgcolor="#FF6633">=</td>
+ <td bgcolor="#FF6633">=</td>
+ </tr>
+</table>
+
+</body>
diff --git a/colprobe/uniqueFiles.pl b/colprobe/uniqueFiles.pl
new file mode 100755
index 0000000..67da54e
--- /dev/null
+++ b/colprobe/uniqueFiles.pl
@@ -0,0 +1,49 @@
+#!/usr/bin/perl
+
+use strict;
+
+my $file;
+my $secondfile;
+my %secondfilelist;
+my @same;
+my %list;
+my $samefile;
+
+foreach $secondfile (@ARGV) {
+ $secondfilelist{$secondfile} = "";
+}
+
+foreach $file (sort keys(%secondfilelist)) {
+ if(exists $secondfilelist{$file}) {
+ delete $secondfilelist{$file};
+ foreach $secondfile (sort(keys %secondfilelist)) {
+ #print "diffing: $file and $secondfile\n";
+ if (!`diff $file $secondfile`) {
+ #print "$file and $secondfile are the same\n";
+ push @same, $secondfile;
+ }
+ }
+# if ($#same > -1) {
+ print "Adding @same to $file\n";
+ $list{$file} = [@same] ;
+ foreach $samefile (@same) {
+ delete $secondfilelist{$samefile};
+ }
+ delete @same[0..$#same];
+# }
+ }
+}
+
+
+my $i = 0;
+my $j = 0;
+ foreach $file (sort( keys %list)) {
+ #print "$file -> "; #@{list{$file}}\n";
+ print "<$file> <$j>\n";
+ foreach $i ( 0 .. $#{ $list{$file} } ) {
+ #print "$list{$file}[$i] ";
+ print "<$list{$file}[$i]> <$j>\n ";
+ }
+ $j++;
+ }
+
diff --git a/colprobe/uprinter.cpp b/colprobe/uprinter.cpp
new file mode 100755
index 0000000..fd9ab9c
--- /dev/null
+++ b/colprobe/uprinter.cpp
@@ -0,0 +1,116 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File uprinter.cpp
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+#include "uprinter.h"
+
+UPrinter::UPrinter(FILE *file, const char *locale, const char *encoding, UBool transliterateNonPrintable) {
+ _on = TRUE;
+ out = u_finit(file, locale, encoding);
+ strcpy(_locale, locale);
+ if(transliterateNonPrintable) {
+ UErrorCode status = U_ZERO_ERROR;
+ UTransliterator *anyHex = utrans_open("[^\\u000d\\u000a\\u0009\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status);
+ u_fsettransliterator(out, U_WRITE, anyHex, &status);
+ }
+};
+
+UPrinter::UPrinter(const char *name, const char *locale, const char *encoding, UTransliterator *trans, UBool transliterateNonPrintable) {
+ _on = TRUE;
+ out = u_fopen(name, "wb", locale, encoding);
+ u_fputc(0xFEFF, out); // emit a BOM
+ strcpy(_locale, locale);
+ if(transliterateNonPrintable) {
+ UErrorCode status = U_ZERO_ERROR;
+ if(trans == NULL) {
+ UTransliterator *anyHex = utrans_open("[^\\u000d\\u000a\\u0009\\u0020-\\u007f] Any-Hex/Java", UTRANS_FORWARD, NULL, 0, NULL, &status);
+ u_fsettransliterator(out, U_WRITE, anyHex, &status);
+ } else {
+ u_fsettransliterator(out, U_WRITE, trans, &status);
+ }
+ }
+};
+
+UPrinter::~UPrinter() {
+ u_fclose(out);
+}
+
+void
+UPrinter::log(const UnicodeString &string, UBool nl) {
+ if(_on) {
+ log(((UnicodeString)string).getTerminatedBuffer(), nl);
+ }
+}
+
+void
+UPrinter::log(const UChar *string, UBool nl) {
+ if(_on) {
+ u_fprintf(out, "%S", string);
+ if(nl) {
+ u_fprintf(out, "\n");
+ }
+ u_fflush(out);
+ }
+}
+/*
+void
+UPrinter::log(const char *string, UBool nl) {
+ if(_on) {
+ u_fprintf(out, "%s", string);
+ if(nl) {
+ u_fprintf(out, "\n");
+ }
+ }
+}
+*/
+void
+UPrinter::log(const Line *line, UBool nl) {
+ if(_on) {
+ log(line->name);
+ if(line->expLen) {
+ log("/");
+ log(line->expansionString);
+ }
+ if(nl) {
+ u_fprintf(out, "\n");
+ u_fflush(out);
+ }
+ }
+}
+
+void UPrinter::log(const char *fmt, ...)
+{
+ UChar buffer[4000];
+ va_list ap;
+
+ va_start(ap, fmt);
+ /* sprintf it just to make sure that the information is valid */
+ u_vsprintf(buffer, _locale, fmt, ap);
+ va_end(ap);
+ if( _on ) {
+ log(buffer);
+ }
+}
+
+void
+UPrinter::on(void) {
+ _on = TRUE;
+}
+
+void
+UPrinter::off(void) {
+ _on = FALSE;
+}
diff --git a/colprobe/uprinter.h b/colprobe/uprinter.h
new file mode 100755
index 0000000..c242104
--- /dev/null
+++ b/colprobe/uprinter.h
@@ -0,0 +1,51 @@
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*
+* File uprinter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 03/18/2003 weiv Creation.
+*******************************************************************************
+*/
+
+#ifndef COLPROBE_UPRINTER_H
+#define COLPROBE_UPRINTER_H
+
+#include "line.h"
+
+#include "unicode/ustdio.h"
+#include "unicode/unistr.h"
+#include "unicode/ustring.h"
+
+
+class UPrinter {
+ UFILE *out;
+ UChar buffer[256];
+ UBool _on;
+ char _locale[256];
+public:
+ UPrinter(FILE *file, const char *locale, const char *encoding, UBool transliterateNonPrintable=TRUE);
+ UPrinter(const char *name, const char *locale, const char *encoding, UTransliterator *trans, UBool transliterateNonPrintable);
+ ~UPrinter();
+ void log(const UnicodeString &string, UBool nl = FALSE);
+ void log(const UChar *string, UBool nl = FALSE);
+ //void log(const char *string, UBool nl = FALSE);
+ void log(const Line *line, UBool nl = FALSE);
+ void log(const char *fmt, ...);
+ void off(void);
+ void on(void);
+ UBool isOn(void) {
+ return _on;
+ };
+};
+
+
+
+#endif // #ifndef COLPROBE_UPRINTER_H
diff --git a/colprobe/winGenCollData.pl b/colprobe/winGenCollData.pl
new file mode 100755
index 0000000..e77464c
--- /dev/null
+++ b/colprobe/winGenCollData.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+#my $localeMinusA = `locale -a`;
+my $localeMinusA = `cat locale.txt`;
+
+my @locales = split(/\r\n/, $localeMinusA);
+my $locale;
+my $command;
+
+#my $commandPath = "~/src/icu/source/extra/colprobe/";
+my $commandPath = "c:/dev/0_icu/source/extra/colprobe/release/";
+
+
+my $platform = $ARGV[0];
+
+mkdir $platform."logs";
+mkdir $platform;
+
+foreach $locale (@locales) {
+ $_ = $locale;
+ chomp;
+ if(!/^\#/) { # && /\_/) {
+ $command = $commandPath."colprobe --platform $platform --ref $platform --output resb $locale >$platform"."logs/$locale"."_log.txt 2>&1";
+
+ print "$command\n";
+ `$command`;
+ }
+}
diff --git a/release/c/allLocaleTest.sh b/release/c/allLocaleTest.sh
new file mode 100755
index 0000000..cb705a5
--- /dev/null
+++ b/release/c/allLocaleTest.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+echo "Testing $1 in all locales"
+outfile=$1-locale.txt
+echo "" > $outfile
+for loc in `locale -a`; do
+echo LC_ALL=$loc >> $outfile
+LC_ALL=$loc make check >> $outfile
+done
+
+echo "Done testing $1 in all locales"
+
diff --git a/release/c/allTimezoneTest.sh b/release/c/allTimezoneTest.sh
new file mode 100755
index 0000000..3878453
--- /dev/null
+++ b/release/c/allTimezoneTest.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+echo "Testing $1 in all timezones"
+outfile=$1-timezone.txt
+echo "" > $outfile
+for timezone in `locate /usr/share/zoneinfo/|fgrep -v /right/|fgrep -v /posix/`; do
+timezone=${timezone#/usr/share/zoneinfo/}
+echo TZ=$timezone >> $outfile
+TZ=$timezone make check >> $outfile
+done
+
+echo "Done testing $1 in all timezones"
+
diff --git a/release/c/environmentTest.sh b/release/c/environmentTest.sh
new file mode 100755
index 0000000..5fa82be
--- /dev/null
+++ b/release/c/environmentTest.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+#
+# This test script enumerates all locales and all timezones installed on a
+# machine (usually Linux), and runs the existing ICU4C tests to make sure that
+# the tests pass. Not everyone is using and testing ICU4C in the en_US locale
+# with the Pacific timezone.
+top_icu_dir=../../../icu
+release_tools_dir=../../../../tools/release/c
+cd $top_icu_dir/source/test/intltest
+$release_tools_dir/allLocaleTest.sh intltest &
+$release_tools_dir/allTimezoneTest.sh intltest &
+cd ../iotest
+$release_tools_dir/allLocaleTest.sh iotest &
+$release_tools_dir/allTimezoneTest.sh iotest &
+cd ../cintltst
+$release_tools_dir/allLocaleTest.sh cintltst &
+$release_tools_dir/allTimezoneTest.sh cintltst &
+
+echo "All tests have been spawned."
+echo "Please wait while the tests run. This may take a while."
diff --git a/release/c/uconfigtest.sh b/release/c/uconfigtest.sh
new file mode 100755
index 0000000..7286495
--- /dev/null
+++ b/release/c/uconfigtest.sh
@@ -0,0 +1,284 @@
+#!/bin/sh
+# Exhaust(ive, ing) (Mean, Multi) (Test, Trouble)
+# Copyright (c) 2002-2006 IBM All Rights Reserved
+#
+
+# Builds ICU a whole lotta times and with different options
+# Set the options below and execute this script with the shell.
+
+# This script is checked into tools/release/c. It assumes that the
+# icu directory is at the same level as the tools directory. If this
+# is not the case, use the uconfigtest.local file to set the
+# SRC_DIR variable to point at the ICU source directory. You can
+# also use the uconfigtest.local file to override the BUILD_DIR
+# and ICUPLATFORM variables.
+
+
+#------------------- Find full path names -----------------------
+
+# check for uconfigtest.local
+if [ -f ./uconfigtest.local ]
+then
+ . ./uconfigtest.local
+fi
+
+# location of this script
+S=$(pwd)
+
+# Build root - tools/release/c/uconfigtest
+BUILD_DIR=${BUILD_DIR:-${S}/uconfigtest}
+
+# the runConfigureICU platform name
+ICUPLATFORM=${ICUPLATFORM:-LinuxRedHat}
+
+# Global Config options to use
+export COPTS=" --with-data-packaging=archive"
+
+# Global testing options to use
+export INTLTESTOPTS=-w
+export CINTLTEST_OPTS=-w
+# --- Probably will not need to modify the following variables ---
+
+# ICU directory is $S/../../../icu
+ICU=$(dirname $(dirname $(dirname ${S})))/icu
+
+# Source directory
+SRC_DIR=${SRC_DIR:-${ICU}/source}
+
+# ------------ End of config variables
+
+# Prepare uconfig.h
+UCONFIG_H=$SRC_DIR/common/unicode/uconfig.h
+if grep -q myconfig.h $UCONFIG_H ;
+then
+ echo "# $UCONFIG_H already contains our patch, no change"
+else
+ mv $UCONFIG_H ${UCONFIG_H}.orig
+ cat > $UCONFIG_H <<EOF
+#if defined(IN_UCONFIGTEST)
+#include "myconfig.h"
+#endif
+/* for uconfigtest.sh - you may REMOVE above this line */
+/* ----------------------------------------------------------- */
+EOF
+cat ${UCONFIG_H}.orig >> ${UCONFIG_H}
+ echo "# $UCONFIG_H updated"
+fi
+
+
+# Start, set a default name to start with in case something goes wrong
+
+export NAME=foo
+mkdir -p ${BUILD_DIR} ${BUILD_DIR}/times 2>/dev/null
+
+# Banner function - print a separator to split the output
+ban()
+{
+ echo
+ echo
+ echo "#- -----------------------$NAME------------- -#"
+ echo
+ echo "CPPFLAGS = $CPPFLAGS"
+ echo "UCONFIGS = $UCONFIGS"
+ echo
+ echo " build to ${BUILD_DIR}/${NAME} and install in ${BUILD_DIR}/I${NAME} "
+ echo
+}
+
+# Clean up the old tree before building again
+clean()
+{
+ echo cleaning ${BUILD_DIR}/${NAME} and ${BUILD_DIR}/I${NAME}
+ rm -rf ${BUILD_DIR}/I${NAME} ${BUILD_DIR}/${NAME}
+ mkdir -p ${BUILD_DIR}/${NAME}
+}
+
+# Run configure with the appropriate options (out of source build)
+config()
+{
+ mkdir -p ${BUILD_DIR}/${NAME} 2>/dev/null
+ cd ${BUILD_DIR}/${NAME}
+ mkdir emtinc 2>/dev/null
+
+ # myconfig.h
+ cat > emtinc/myconfig.h <<EOF
+// NAME=${NAME}
+// UCONFIGS=${UCONFIGS}
+// CPPFLAGS=${CPPFLAGS}
+#ifndef _MYCONFIG_H
+#define _MYCONFIG_H
+
+EOF
+ for what in `echo $UCONFIGS`;
+ do
+ echo "#define UCONFIG_${what} 1" >> emtinc/myconfig.h
+ done
+ cat >> emtinc/myconfig.h <<EOF
+#endif
+EOF
+ CPPFLAGS="${CPPFLAGS} -DIN_UCONFIGTEST -I${BUILD_DIR}/${NAME}/emtinc"
+ echo "CPPFLAGS=\"$CPPFLAGS\" Configure $COPTS --srcdir=$SRC_DIR"
+ $SRC_DIR/runConfigureICU ${ICUPLATFORM} $COPTS --prefix=${BUILD_DIR}/I${NAME} --srcdir=$SRC_DIR 2>&1 > ${BUILD_DIR}/${NAME}/config.out
+}
+
+# Do an actual build
+bld()
+{
+##*## Stream filter to put 'NAME: ' in front of
+##*## every line:
+##*## . . . 2>&1 | tee -a ./bld.log | sed -e "s/^/${NAME}: /"
+ cd ${BUILD_DIR}/${NAME}
+ /usr/bin/time -o ${BUILD_DIR}/times/${NAME}.all make -k all
+ /usr/bin/time -o ${BUILD_DIR}/times/${NAME}.install make -k install
+ /usr/bin/time -o ${BUILD_DIR}/times/${NAME}.il make -k install-local
+ /usr/bin/time -o ${BUILD_DIR}/times/${NAME}.chk make -k check INTLTEST_OPTS=-w CINTLTST_OPTS=-w
+ PATH=${BUILD_DIR}/I${NAME}/bin:$PATH make -C ${BUILD_DIR}/${NAME}/test/hdrtst/ check
+}
+
+# Do a complete cycle for a run
+doit()
+{
+ban ; clean ; config ; bld
+}
+
+# Set up the variables for convenience
+NO_COL="NO_COLLATION"
+NO_BRK="NO_BREAK_ITERATION"
+NO_FMT="NO_FORMATTING"
+NO_UCM="NO_LEGACY_CONVERSION"
+# Since NO_CONVERSION is only meant to allow the common and i18n
+# libraries to be built, we don't test this configuration.
+#NO_CNV="NO_CONVERSION"
+NO_FIO="NO_FILE_IO"
+NO_XLT="NO_TRANSLITERATION"
+NO_RGX="NO_REGULAR_EXPRESSIONS"
+JS_COL="ONLY_COLLATION"
+NO_NRM="NO_NORMALIZATION"
+NO_IDN="NO_IDNA"
+NO_SVC="NO_SERVICE"
+NO_MST="$NO_COL $NO_BRK $NO_FMT $NO_UCM $NO_FIO $NO_RGX $NO_XLT $NO_NRM $NO_IDN $NO_SVC"
+NO_ALL="$NO_MST $NO_SVC"
+
+# Now, come the actual test runs
+# Each one sets a NAME, and CPPFLAGS or other flags, and calls doit
+
+######################
+# NO_MST
+export NAME=NO_MST
+export UCONFIGS="$NO_MST"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_RGX
+export NAME=NO_RGX
+export UCONFIGS="$NO_RGX"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_COL
+export NAME=NO_COL
+export UCONFIGS="$NO_COL"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_BRK
+export NAME=NO_BRK
+export UCONFIGS="$NO_BRK"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_FMT
+export NAME=NO_FMT
+export UCONFIGS="$NO_FMT"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_UCM
+export NAME=NO_UCM
+export UCONFIGS="$NO_UCM"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_FIO
+export NAME=NO_FIO
+export UCONFIGS="$NO_FIO"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_XLT
+export NAME=NO_XLT
+export UCONFIGS="$NO_XLT"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_IDN
+export NAME=NO_IDN
+export UCONFIGS="$NO_IDN"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_NRM
+export NAME=NO_NRM
+export UCONFIGS="$NO_NRM"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_SVC
+export NAME=NO_SVC
+export UCONFIGS="$NO_SVC"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# JS_COL
+export NAME=JS_COL
+export UCONFIGS="$JS_COL"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# NO_ALL
+export NAME=NO_ALL
+export UCONFIGS="$NO_ALL"
+export CPPFLAGS=""
+doit
+######################
+
+######################
+# DEFAULT
+export NAME=DEFAULT
+export UCONFIGS=""
+export CPPFLAGS=""
+doit
+######################
+
+
+NAME=done
+ban
+echo "All builds finished! Times are in ${BUILD_DIR}/times"
+echo "There were errors if the following grep finds anything."
+echo "grep status ${BUILD_DIR}/times/*"
+grep status ${BUILD_DIR}/times/*
+
diff --git a/release/java/api-report.properties b/release/java/api-report.properties
new file mode 100644
index 0000000..913472f
--- /dev/null
+++ b/release/java/api-report.properties
@@ -0,0 +1,6 @@
+# api-report.properties contains properties for API report generation
+#update these values to current version and directories
+oldver=ICU 3.4
+olddir=/work/clean/icu-3-4/source/doc/xml/
+newver=ICU 3.6
+newdir=/work/clean/icu-3-6/source/doc/xml/
\ No newline at end of file
diff --git a/release/java/build.xml b/release/java/build.xml
new file mode 100644
index 0000000..8608713
--- /dev/null
+++ b/release/java/build.xml
@@ -0,0 +1,73 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU tools.
+*/
+-->
+<project name="API-Tools" default="main" basedir=".">
+ <target name="init">
+ <tstamp/>
+ <property name="src.dir" value="src"/>
+ <property name="build.dir" value="classes"/>
+ <property name="jar.file" value="cldr.jar"/>
+ <property name="jarSrc.file" value="cldrsrc.jar"/>
+ <property file="api-report.properties" />
+
+ <mkdir dir="${build.dir}"/>
+ <echo message="java home: ${java.home}"/>
+ <echo message="java version: ${java.version}"/>
+ <echo message="ant java version: ${ant.java.version}"/>
+ <echo message="${ant.version}"/>
+ <echo message="${basedir}"/>
+ </target>
+
+ <target name="doctools" depends="init" description="build StableAPI classes">
+ <javac includes="com/ibm/icu/dev/tools/docs/*.java"
+ excludes="**/CVS/**/*"
+ srcdir="${src.dir}"
+ destdir="${build.dir}"
+ source="1.4"
+ debug="on" deprecation="off"
+ encoding="ascii"/>
+ </target>
+ <target name="clean" depends="init" description="remove all build targets">
+ <delete dir="${build.dir}"/>
+ </target>
+ <target name="apireport" depends="doctools">
+ <java classname="com.ibm.icu.dev.tools.docs.StableAPI" fork="yes" failonerror="true">
+
+ <arg value = "--oldver"/>
+ <arg value = "${oldver}"/>
+
+ <arg value = "--olddir"/>
+ <arg value = "${olddir}"/>
+
+ <arg value = "--newver"/>
+ <arg value = "${newver}"/>
+
+ <arg value = "--newdir"/>
+ <arg value = "${newdir}"/>
+
+ <arg value = "--cppxslt"/>
+ <arg value = "${basedir}/src/com/ibm/icu/dev/tools/docs/dumpAllCppFunc.xslt"/>
+
+ <arg value = "--cxslt"/>
+ <arg value = "${basedir}/src/com/ibm/icu/dev/tools/docs/dumpAllCFunc.xslt"/>
+
+ <arg value = "--reportxslt"/>
+ <arg value = "${basedir}/src/com/ibm/icu/dev/tools/docs/genReport.xslt"/>
+
+ <arg value = "--resultfile"/>
+ <arg value = "${basedir}/APIChangeReport.html"/>
+
+ <classpath>
+ <pathelement location="${build.dir}"/>
+ <pathelement path="${java.class.path}/"/>
+ <pathelement path="."/>
+ </classpath>
+ </java>
+ </target>
+</project>
\ No newline at end of file
diff --git a/release/java/readme.txt b/release/java/readme.txt
new file mode 100644
index 0000000..78c4a2f
--- /dev/null
+++ b/release/java/readme.txt
@@ -0,0 +1,19 @@
+
+A tool to generate a report of API status changes between two ICU releases
+
+ To use the utility
+ 1. Generate the XML files
+ (put the two ICU releases on your machine ^_^ )
+ (generate 'Doxygen' file on Windows platform with Cygwin's help)
+ Edit the generated 'Doxygen' file under ICU4C source directory
+ a) GENERATE_XML = YES
+ b) Sync the ALIASES definiation
+ (For example, copy the ALIASES defination from ICU 3.6
+ Doxygen file to ICU 3.4 Doxygen file.)
+ c) gerenate the XML files
+ 2. Build the tool
+ ant doctools
+ 3. Edit the api-report.properties and change the values of oldver, olddir, newver, newdir
+ 4. Run the tool to generate the report
+ ant apireport
+
\ No newline at end of file
diff --git a/release/java/src/com/ibm/icu/dev/tools/docs/StableAPI.java b/release/java/src/com/ibm/icu/dev/tools/docs/StableAPI.java
new file mode 100644
index 0000000..f7667f1
--- /dev/null
+++ b/release/java/src/com/ibm/icu/dev/tools/docs/StableAPI.java
@@ -0,0 +1,442 @@
+/*
+ **********************************************************************
+ * Copyright (c) 2006, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ * Created on 2006-7-24
+ */
+package com.ibm.icu.dev.tools.docs;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.lang.reflect.Field;
+import java.util.GregorianCalendar;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.transform.Result;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerException;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMResult;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
+
+import org.apache.crimson.jaxp.DocumentBuilderFactoryImpl;
+import org.apache.xerces.parsers.DOMParser;
+import org.apache.xpath.XPathAPI;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.xml.sax.InputSource;
+
+/**
+ A utility to report the status change between two ICU releases
+
+To use the utility
+1. Generate the XML files
+ (put the two ICU releases on your machine ^_^ )
+ (generate 'Doxygen' file on Windows platform with Cygwin's help)
+ Edit the generated 'Doxygen' file under ICU4C source directory
+ a) GENERATE_XML = YES
+ b) Sync the ALIASES definiation
+ (For example, copy the ALIASES defination from ICU 3.6
+ Doxygen file to ICU 3.4 Doxygen file.)
+ c) gerenate the XML files
+2. Build the tool
+ Download Apache Xerces Java Parser
+ Build this file with the library
+3. Edit the api-report-config.xml file & Change the file according your real configuration
+4. Run the tool to generate the report.
+
+ * @author Raymond Yang
+ */
+public class StableAPI {
+
+ private String leftVer;
+ private String leftDir;
+// private String leftStatus;
+
+ private String rightVer;
+ private String rightDir;
+// private String rightStatus;
+
+ private String dumpCppXslt;
+ private String dumpCXslt;
+ private String reportXsl;
+ private String resultFile;
+
+ final private static String nul = "None";
+
+ public static void main(String[] args) throws FileNotFoundException, TransformerException, ParserConfigurationException {
+
+ StableAPI t = new StableAPI();
+ t.parseArgs(args);
+ Set full = new HashSet();
+
+ Set setCpp = t.getFullList(t.dumpCppXslt);
+ full.addAll(setCpp);
+
+ Set setC = t.getFullList(t.dumpCXslt);
+ full.addAll(setC);
+
+ Node fullList = t.setToNode(full);
+// t.dumpNode(fullList,"");
+
+ t.reportSelectedFun(fullList);
+ System.out.println("Done. Please check " + t.resultFile);
+ }
+
+
+ private void parseArgs(String[] args){
+ for (int i = 0; i < args.length; i++) {
+ String arg = args[i];
+ if (arg == null || arg.length() == 0) {
+ continue;
+ }
+ if (arg.equals("--help") ) {
+ printUsage();
+ } else if (arg.equals("--oldver") ) {
+ leftVer = args[++i];
+ } else if (arg.equals("--olddir") ) {
+ leftDir = args[++i];
+ } else if (arg.equals("--newver")) {
+ rightVer = args[++i];
+ } else if (arg.equals("--newdir")) {
+ rightDir = args[++i];
+ } else if (arg.equals("--cxslt") ) {
+ dumpCXslt = args[++i];
+ } else if (arg.equals("--cppxslt") ) {
+ dumpCppXslt = args[++i];
+ } else if (arg.equals("--reportxslt") ) {
+ reportXsl = args[++i];
+ } else if (arg.equals("--resultfile")) {
+ resultFile = args[++i];
+ } else {
+ System.out.println("Unknown option: "+arg);
+ printUsage();
+ }
+ }
+ }
+
+ private static void printUsage(){
+ System.out.println("Usage: StableAPI option* target*");
+ System.out.println();
+ System.out.println("Options:");
+ System.out.println(" --help Print this text");
+ System.out.println(" --oldver Version of old version of ICU");
+ System.out.println(" --olddir Directory that contains xml docs of old version");
+ System.out.println(" --newver Version of new version of ICU");
+ System.out.println(" --newdir Directory that contains xml docs of new version");
+ System.out.println(" --cxslt XSLT file for C docs");
+ System.out.println(" --cppxslt XSLT file for C++ docs");
+ System.out.println(" --reportxslt XSLT file for report docs");
+ System.out.println(" --resultfile Output file");
+ System.exit(-1);
+ }
+
+ static String getAttr(Node node, String attrName){
+ return node.getAttributes().getNamedItem(attrName).getNodeValue();
+ }
+
+ static String getAttr(NamedNodeMap attrList, String attrName){
+ return attrList.getNamedItem(attrName).getNodeValue();
+ }
+
+ static class Fun {
+ public String prototype;
+ public String id;
+ public String status;
+ public String file;
+ public boolean equals(Fun right){
+ return this.prototype.equals(right.prototype);
+ }
+ static Fun fromXml(Node n){
+ Fun f = new Fun();
+ f.prototype = getAttr(n, "prototype");
+ f.id = getAttr(n, "id");
+ f.status = getAttr(n, "status");
+ f.file = getAttr(n, "file");
+ f.purifyPrototype();
+ f.purifyFile();
+ return f;
+ }
+
+ private void purifyFile(){
+ int i = file.lastIndexOf("/");
+ file = i == -1 ? file : file.substring(i+1);
+ }
+
+ /**
+ * Special cases:
+ *
+ * Remove the status attribute embedded in the C prototype
+ *
+ * Remove the virtual keyword in Cpp prototype
+ */
+ private void purifyPrototype(){
+ //refer to 'umachine.h'
+ String statusList[] = {"U_CAPI", "U_STABLE", "U_DRAFT", "U_DEPRECATED", "U_OBSOLETE", "U_INTERNAL", "virtual"};
+ for (int i = 0; i < statusList.length; i++) {
+ String s = statusList[i];
+ prototype = prototype.replaceAll(s,"");
+ prototype = prototype.trim();
+ }
+ prototype = prototype.trim();
+ }
+// private Element toXml(Document doc){
+// Element ele = doc.createElement("func");
+// ele.setAttribute("prototype", prototype);
+// ele.setAttribute("id", id);
+// ele.setAttribute("status", status);
+// return ele;
+// }
+ }
+
+ static class JoinedFun {
+ public String prototype;
+ public String leftRefId;
+ public String leftStatus;
+ public String leftFile;
+ public String rightRefId;
+ public String rightStatus;
+ public String rightFile;
+
+ static JoinedFun fromLeftFun(Fun left){
+ JoinedFun u = new JoinedFun();
+ u.prototype = left.prototype;
+ u.leftRefId = left.id;
+ u.leftStatus = left.status;
+ u.leftFile = left.file;
+ u.rightRefId = nul;
+ u.rightStatus = nul;
+ u.rightFile = nul;
+ return u;
+ }
+
+ static JoinedFun fromRightFun(Fun right){
+ JoinedFun u = new JoinedFun();
+ u.prototype = right.prototype;
+ u.leftRefId = nul;
+ u.leftStatus = nul;
+ u.leftFile = nul;
+ u.rightRefId = right.id;
+ u.rightStatus = right.status;
+ u.rightFile = right.file;
+ return u;
+ }
+
+ static JoinedFun fromTwoFun(Fun left, Fun right){
+ if (!left.equals(right)) throw new Error();
+ JoinedFun u = new JoinedFun();
+ u.prototype = left.prototype;
+ u.leftRefId = left.id;
+ u.leftStatus = left.status;
+ u.leftFile = left.file;
+ u.rightRefId = right.id;
+ u.rightStatus = right.status;
+ u.rightFile = right.file;
+ return u;
+ }
+
+ Element toXml(Document doc){
+ Element ele = doc.createElement("func");
+ ele.setAttribute("prototype", prototype);
+// ele.setAttribute("leftRefId", leftRefId);
+
+ ele.setAttribute("leftStatus", leftStatus);
+// ele.setAttribute("rightRefId", rightRefId);
+ ele.setAttribute("rightStatus", rightStatus);
+
+
+// String f = rightRefId.equals(nul) ? leftRefId : rightRefId;
+// int tail = f.indexOf("_");
+// f = tail != -1 ? f.substring(0, tail) : f;
+// f = f.startsWith("class") ? f.replaceFirst("class","") : f;
+ String f = rightFile.equals(nul) ? leftFile : rightFile;
+ ele.setAttribute("file", f);
+ return ele;
+ }
+ }
+
+ TransformerFactory transFac = TransformerFactory.newInstance();
+
+ private void reportSelectedFun(Node joinedNode) throws FileNotFoundException, TransformerException{
+ Transformer report = transFac.newTransformer(new DOMSource(getDocument(reportXsl)));
+// report.setParameter("leftStatus", leftStatus);
+ report.setParameter("leftVer", leftVer);
+// report.setParameter("rightStatus", rightStatus);
+ report.setParameter("rightVer", rightVer);
+ report.setParameter("dateTime", new GregorianCalendar().getTime());
+ report.setParameter("nul", nul);
+
+ DOMSource src = new DOMSource(joinedNode);
+
+ Result res = new StreamResult(new File(resultFile));
+// DOMResult res = new DOMResult();
+ report.transform(src, res);
+// dumpNode(res.getNode(),"");
+ }
+
+ private Set getFullList(String dumpXsltFile) throws FileNotFoundException, TransformerException, ParserConfigurationException{
+ // prepare transformer
+ Transformer transformer = transFac.newTransformer(new DOMSource(getDocument(dumpXsltFile)));
+// Node joinedNode = null;
+
+ DOMSource leftIndex = new DOMSource(getDocument(leftDir + "index.xml"));
+ DOMResult leftResult = new DOMResult();
+ transformer.setParameter("docFolder", leftDir);
+ transformer.transform(leftIndex, leftResult);
+ Node leftList = XPathAPI.selectSingleNode(leftResult.getNode(),"/list");
+// dumpNode(leftList,"");
+
+ DOMSource rightIndex = new DOMSource(getDocument(rightDir + "index.xml"));
+ DOMResult rightResutl = new DOMResult();
+ transformer.setParameter("docFolder", rightDir);
+ transformer.transform(rightIndex, rightResutl);
+ Node rightList = XPathAPI.selectSingleNode(rightResutl.getNode(),"/list");
+// dumpNode(rightList,"");
+
+
+ Set leftSet = nodeToSet(leftList);
+ Set rightSet = nodeToSet(rightList);
+ Set joined = fullJoin(leftSet, rightSet);
+ return joined;
+// joinedNode = setToNode(joined);
+// dumpNode(joinedNode,"");
+// return joinedNode;
+ }
+
+ /**
+ * @param node
+ * @return Set<Fun>
+ */
+ private Set nodeToSet(Node node){
+ Set s = new HashSet();
+ NodeList list = node.getChildNodes();
+ for (int i = 0; i < list.getLength(); i++) {
+ Node n = list.item(i);
+ s.add(Fun.fromXml(n));
+ }
+ return s;
+ }
+
+ /**
+ * @param set Set<JoinedFun>
+ * @return
+ * @throws ParserConfigurationException
+ */
+ private Node setToNode(Set set) throws ParserConfigurationException{
+ DocumentBuilderFactory dbf = DocumentBuilderFactoryImpl.newInstance();
+ Document doc = dbf.newDocumentBuilder().newDocument();
+ Element root = doc.createElement("list");
+ doc.appendChild(root);
+ for (Iterator iter = set.iterator(); iter.hasNext();) {
+ JoinedFun fun = (JoinedFun) iter.next();
+ root.appendChild(fun.toXml(doc));
+ }
+ return doc;
+ }
+
+ /**
+ * full-join two Set on 'prototype'
+ *
+ * @param left Set<Fun>
+ * @param right Set<Fun>
+ * @return Set<JoinedFun>
+ */
+ private static Set fullJoin(Set left, Set right){
+
+ Set joined = new HashSet(); //Set<JoinedFun>
+ Set common = new HashSet(); //Set<Fun>
+ for (Iterator iter1 = left.iterator(); iter1.hasNext();) {
+ Fun f1 = (Fun) iter1.next();
+// if (f1.prototype.matches(".*Transliterator::.*")){
+// System.err.println("left: " + f1.prototype);
+// System.err.println("left: " + f1.status);
+// }
+ for (Iterator iter2 = right.iterator(); iter2.hasNext();) {
+ Fun f2 = (Fun) iter2.next();
+// if ( f1.prototype.matches(".*filteredTransliterate.*")
+// && f2.prototype.matches(".*filteredTransliterate.*")){
+// System.err.println("right: " + f2.prototype);
+// System.err.println("right: " + f2.status);
+// System.err.println(f1.prototype.equals(f2.prototype));
+// System.err.println(f1.prototype.getBytes()[0]);
+// System.err.println(f2.prototype.getBytes()[0]);
+// }
+ if (f1.equals(f2)) {
+ // should add left item to common set
+ // since we will remove common items with left set later
+ common.add(f1);
+ joined.add(JoinedFun.fromTwoFun(f1, f2));
+ right.remove(f2);
+ break;
+ }
+ }
+ }
+
+ for (Iterator iter = common.iterator(); iter.hasNext();) {
+ Fun f = (Fun) iter.next();
+ left.remove(f);
+ }
+
+ for (Iterator iter = left.iterator(); iter.hasNext();) {
+ Fun f = (Fun) iter.next();
+ joined.add(JoinedFun.fromLeftFun(f));
+ }
+
+ for (Iterator iter = right.iterator(); iter.hasNext();) {
+ Fun f = (Fun) iter.next();
+ joined.add(JoinedFun.fromRightFun(f));
+ }
+ return joined;
+ }
+
+ private static void dumpNode(Node n, String pre){
+ pre += " ";
+ System.out.println(pre + "<" + n.getNodeName() + ">");
+ //dump attribute
+ NamedNodeMap attr = n.getAttributes();
+ if (attr!=null){
+ for (int i = 0; i < attr.getLength(); i++) {
+ System.out.println(attr.item(i));
+ }
+ }
+
+ // dump value
+ String v = pre + n.getNodeValue();
+// if (n.getNodeType() == Node.TEXT_NODE)
+ System.out.println(v);
+
+ // dump sub nodes
+ NodeList nList = n.getChildNodes();
+ for (int i = 0; i < nList.getLength(); i++) {
+ Node ln = nList.item(i);
+ dumpNode(ln, pre + " ");
+ }
+ System.out.println(pre + "</" + n.getNodeName() + ">");
+ }
+
+ private static Document getDocument(String name) throws FileNotFoundException{
+ FileInputStream fis = new FileInputStream(name);
+ InputSource inputSource = new InputSource(fis);
+ DOMParser parser = new DOMParser();
+ //convert it into DOM
+ try {
+ parser.parse(inputSource);
+ // fis.close();
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ Document doc = parser.getDocument();
+ return doc;
+ }
+
+}
diff --git a/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCFunc.xslt b/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCFunc.xslt
new file mode 100644
index 0000000..30d3bcf
--- /dev/null
+++ b/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCFunc.xslt
@@ -0,0 +1,35 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU tools.
+*/
+-->
+<!--
+ List all c functions generated from the 'index.xml'
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:param name="docFolder" />
+
+ <xsl:template match="/">
+ <list>
+ <xsl:variable name="files_node" select="/doxygenindex/compound[@kind='file']/@refid" />
+ <xsl:for-each select="$files_node">
+ <xsl:variable name="file" select="concat($docFolder, . , '.xml')" />
+ <xsl:variable name="funcs_node" select="document($file)/doxygen/compounddef/sectiondef/memberdef[@prot='public'][@kind='function']" />
+ <xsl:for-each select="$funcs_node">
+ <cppfunc>
+ <xsl:copy-of select="@id" />
+ <xsl:attribute name="status"><xsl:value-of select="detaileddescription/para/xrefsect/xreftitle/text()"/></xsl:attribute>
+ <xsl:attribute name="prototype"><xsl:value-of select="concat(definition/text(), argsstring/text())" /></xsl:attribute>
+ <xsl:copy-of select="location/@file" />
+ </cppfunc>
+ </xsl:for-each>
+ </xsl:for-each>
+ </list>
+ </xsl:template>
+</xsl:stylesheet>
+
+
diff --git a/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCppFunc.xslt b/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCppFunc.xslt
new file mode 100644
index 0000000..b72ade5
--- /dev/null
+++ b/release/java/src/com/ibm/icu/dev/tools/docs/dumpAllCppFunc.xslt
@@ -0,0 +1,35 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU tools.
+*/
+-->
+<!--
+ List all cpp pulbic functions generated from the 'index.xml'
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+ <xsl:param name="docFolder" />
+
+ <xsl:template match="/">
+ <list>
+ <xsl:variable name="files_node" select="/doxygenindex/compound[@kind='class']/@refid" />
+ <xsl:for-each select="$files_node">
+ <xsl:variable name="file" select="concat($docFolder, . , '.xml')" />
+ <xsl:variable name="funcs_node" select="document($file)/doxygen/compounddef/sectiondef/memberdef[@prot='public'][@kind='function']" />
+ <xsl:for-each select="$funcs_node">
+ <cppfunc>
+ <xsl:copy-of select="@id" />
+ <xsl:attribute name="status"><xsl:value-of select="detaileddescription/para/xrefsect/xreftitle/text()"/></xsl:attribute>
+ <xsl:attribute name="prototype"><xsl:value-of select="concat(definition/text(), argsstring/text())" /></xsl:attribute>
+ <xsl:copy-of select="location/@file" />
+ </cppfunc>
+ </xsl:for-each>
+ </xsl:for-each>
+ </list>
+ </xsl:template>
+</xsl:stylesheet>
+
+
diff --git a/release/java/src/com/ibm/icu/dev/tools/docs/genReport.xslt b/release/java/src/com/ibm/icu/dev/tools/docs/genReport.xslt
new file mode 100644
index 0000000..324a50e
--- /dev/null
+++ b/release/java/src/com/ibm/icu/dev/tools/docs/genReport.xslt
@@ -0,0 +1,102 @@
+<!--
+/*
+*******************************************************************************
+* Copyright (C) 2006, International Business Machines Corporation and *
+* others. All Rights Reserved. *
+*******************************************************************************
+* This is the ant build file for ICU tools.
+*/
+-->
+<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
+<!--
+ <xsl:param name="leftStatus" />
+ <xsl:param name="rightStatus" />
+-->
+ <xsl:param name="leftVer" />
+ <xsl:param name="rightVer" />
+ <xsl:param name="dateTime" />
+ <xsl:param name="nul" />
+
+
+ <xsl:template match="/">
+ <html>
+ <head>
+ <title>ICU4C API Comparison: <xsl:value-of select="$leftVer"/> with <xsl:value-of select="$rightVer" /> </title>
+ </head>
+
+ <body>
+
+ <h1>ICU4C API Comparison: <xsl:value-of select="$leftVer"/> with <xsl:value-of select="$rightVer" /> </h1>
+ <hr/>
+
+ <h2>Removed from <xsl:value-of select="$leftVer"/> </h2>
+ <xsl:call-template name="genTable">
+ <xsl:with-param name="nodes" select="/list/func[@rightStatus=$nul]"/>
+ </xsl:call-template>
+ <P/><hr/>
+
+ <h2>Deprecated or Obsoleted in <xsl:value-of select="$rightVer" /></h2>
+ <xsl:call-template name="genTable">
+ <xsl:with-param name="nodes" select="/list/func[(@rightStatus='Deprecated' and @leftStatus!='Deprecated') or (@rightStatus='Obsolete' and @leftStatus!='Obsolete')]"/>
+ </xsl:call-template>
+ <P/><hr/>
+
+ <h2>Changed in <xsl:value-of select="$rightVer" /> (old, new)</h2>
+ <xsl:call-template name="genTable">
+ <xsl:with-param name="nodes" select="/list/func[(@leftStatus != $nul) and (@rightStatus != $nul) and (@leftStatus != @rightStatus)]"/>
+ </xsl:call-template>
+ <P/><hr/>
+
+ <h2>Promoted to stable in <xsl:value-of select="$rightVer" /></h2>
+ <xsl:call-template name="genTable">
+ <xsl:with-param name="nodes" select="/list/func[@leftStatus != 'Stable' and @rightStatus = 'Stable']"/>
+ </xsl:call-template>
+ <P/><hr/>
+
+ <h2>Added in <xsl:value-of select="$rightVer" /></h2>
+ <xsl:call-template name="genTable">
+ <xsl:with-param name="nodes" select="/list/func[@leftStatus=$nul]"/>
+ </xsl:call-template>
+ <P/><hr/>
+<!--
+
+-->
+
+ <p><i><font size="-1">Contents generated by StableAPI tool on <xsl:value-of select="$dateTime" /><br/>Copyright (C) 2006, International Business Machines Corporation, All Rights Reserved.</font></i></p>
+ </body>
+ </html>
+ </xsl:template>
+
+ <xsl:template name="genTable">
+ <xsl:param name="nodes" />
+ <table BORDER="1">
+ <THEAD>
+ <tr>
+ <th> <xsl:value-of select="'File'" /> </th>
+ <th> <xsl:value-of select="'Public API Prototype'" /> </th>
+ <th> <xsl:value-of select="$leftVer" /> </th>
+ <th> <xsl:value-of select="$rightVer" /> </th>
+ </tr>
+ </THEAD>
+
+ <xsl:for-each select="$nodes">
+ <xsl:sort select="@file" />
+ <tr>
+ <xsl:attribute name="STYLE">
+ <xsl:if test ="@leftStatus = 'Stable'">
+ <xsl:value-of select="'color: red'" />
+ </xsl:if>
+ </xsl:attribute>
+ <td> <xsl:value-of select="@file" /> </td>
+ <td> <xsl:value-of select="@prototype" /> </td>
+ <td> <xsl:value-of select="@leftStatus" /> </td>
+ <td> <xsl:value-of select="@rightStatus" /> </td>
+ </tr>
+ </xsl:for-each>
+ </table>
+ </xsl:template>
+</xsl:stylesheet>
+
+
+
+
diff --git a/unicodetools/com/ibm/rbm/.cvsignore b/unicodetools/com/ibm/rbm/.cvsignore
new file mode 100644
index 0000000..a65b417
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/.cvsignore
@@ -0,0 +1 @@
+lib
diff --git a/unicodetools/com/ibm/rbm/Bundle.java b/unicodetools/com/ibm/rbm/Bundle.java
new file mode 100644
index 0000000..71a2cd3
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/Bundle.java
@@ -0,0 +1,470 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.Writer;
+import java.util.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+/**
+ * A class representing the entire Bundle of Resources for a particular language, country, variant.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class Bundle {
+
+ /**
+ * The following public class variables reflect the various properties that can be included as
+ * meta-data in a resource bundle formatted by RBManager
+ */
+ public String name;
+ /**
+ * The encoding of the bundle (e.g. 'en', 'en_US', 'de', etc.)
+ */
+ public String encoding;
+ /**
+ * A descriptor of the language in the encoding (e.g. English, German, etc.)
+ */
+ public String language;
+ /**
+ * A descriptor of the country in the encoding (e.g. US, Canada, Great Britain)
+ */
+ public String country;
+ /**
+ * The descriptor of the variant in the encoding (e.g. Euro, Irish, etc.)
+ */
+ public String variant;
+ /**
+ * A comment concerning the bundle
+ */
+ public String comment;
+ /**
+ * The name of the person responsible for the managerment of this bundle
+ */
+ public String manager;
+
+ private TreeSet groups; // A vector of groups of NLS items, the key is the group name
+
+ /**
+ * A hashtable of all of the items in the bundle, hashed according to their
+ * NLS key.
+ */
+
+ public Hashtable allItems; // A hashtable of all items in the file, the key is the NLS key
+
+ private TreeSet untranslatedItems; // A vector of all items which are untranslated
+
+ /**
+ * A vector containing all of the items which are duplicates (based on the NLS keys)
+ * of items previously declared in the bundle.
+ */
+
+ public Vector duplicates; // A vector of items which are duplicates (NLS Keys) of previous items
+
+ /**
+ * Constructor for creating an empty bundle with a given encoding
+ */
+
+ public Bundle(String encoding) {
+ this.encoding = encoding;
+ language = null;
+ country = null;
+ variant = null;
+ comment = null;
+ manager = null;
+ groups = new TreeSet(new Comparator() {
+ public boolean equals(Object o) { return false; }
+
+ public int compare(Object o1, Object o2) {
+ if (!(o1 instanceof BundleGroup) || !(o2 instanceof BundleGroup))
+ return 0;
+ BundleGroup g1 = (BundleGroup)o1;
+ BundleGroup g2 = (BundleGroup)o2;
+ return g1.getName().compareTo(g2.getName());
+ }
+ });
+
+ untranslatedItems = new TreeSet(new Comparator() {
+ public boolean equals(Object o) { return false; }
+
+ public int compare(Object o1, Object o2) {
+ if (!(o1 instanceof BundleItem) || !(o2 instanceof BundleItem)) return 0;
+ BundleItem i1 = (BundleItem)o1;
+ BundleItem i2 = (BundleItem)o2;
+ return i1.getKey().compareTo(i2.getKey());
+ }
+ });
+
+ duplicates = new Vector();
+ allItems = new Hashtable();
+ }
+
+ /**
+ * Encodings are of the form -> language_country_variant <- (for example: "en_us_southern").
+ * This method returns the language encoding string, or null if it is not specified
+ */
+
+ public String getLanguageEncoding() {
+ if (encoding == null)
+ return null;
+ if (encoding.indexOf("_") >= 0)
+ return encoding.substring(0,encoding.indexOf("_"));
+ return encoding.trim();
+ }
+
+ /**
+ * Encodings are of the form -> language_country_variant <- (for example: "en_us_southern").
+ * This method returns the country encoding string, or null if it is not specified
+ */
+
+ public String getCountryEncoding() {
+ if (encoding == null || encoding.indexOf("_") < 0)
+ return null;
+ // Strip off the language
+ String workStr = encoding.substring(encoding.indexOf("_")+1,encoding.length());
+ if (workStr.indexOf("_") >= 0)
+ return workStr.substring(0,encoding.indexOf("_"));
+ return workStr.trim();
+ }
+
+ /**
+ * Encodings are of the form -> language_country_variant <- (for example: "en_us_southern").
+ * This method returns the variant encoding string, or null if it is not specified
+ */
+
+ public String getVariantEncoding() {
+ if (encoding == null || encoding.indexOf("_") < 0)
+ return null;
+ // Strip off the language
+ String workStr = encoding.substring(encoding.indexOf("_")+1,encoding.length());
+ if (workStr == null || workStr.length() < 1 || workStr.indexOf("_") < 0)
+ return null;
+ // Strip off the country
+ workStr = workStr.substring(encoding.indexOf("_")+1, workStr.length());
+ return workStr.trim();
+ }
+
+ /**
+ * Returns the UntranslatedItems as a vector. I should find where this happens and stop it.
+ */
+
+ public Vector getUntranslatedItemsAsVector() {
+ Iterator iter = untranslatedItems.iterator();
+ Vector v = new Vector();
+ while (iter.hasNext())
+ v.addElement(iter.next());
+ return v;
+ }
+
+ /**
+ * Checks all items in the untranslated items set. If they belong to a group whose name
+ * matches the passed in name, then they are removed.
+ */
+
+ public void removeUntranslatedItemsByGroup(String groupName) {
+ Iterator iter = untranslatedItems.iterator();
+ try {
+ while(iter.hasNext()) {
+ BundleItem item = null;
+ item = (BundleItem)iter.next();
+ if (item != null && item.getParentGroup().getName().equals(groupName)) {
+ removeUntranslatedItem(item.getKey());
+ }
+ }
+ } catch (Exception e) {
+ RBManagerGUI.debugMsg(e.getMessage());
+ }
+ }
+
+ /**
+ * Checks to see if an item of the given key name exists in the set of untranslated items. If
+ * it does exist, then it is removed.
+ */
+
+ public void removeUntranslatedItem(String name) {
+ Iterator iter = untranslatedItems.iterator();
+ while (iter.hasNext()) {
+ BundleItem item = (BundleItem)iter.next();
+ if (item.getKey().equals(name)) {
+ untranslatedItems.remove(item);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Returns the boolean of wether a group of a given name exists in the bundle
+ */
+
+ public boolean hasGroup(String groupName) {
+ Iterator iter = groups.iterator();
+ while (iter.hasNext()) {
+ BundleGroup group = (BundleGroup)iter.next();
+ if (group.getName().equals(groupName))
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Creates a group of the given name and optionally associates a comment with
+ * that group.
+ */
+
+ public void addBundleGroup(String groupName, String groupComment) {
+ BundleGroup bg = new BundleGroup(this, groupName);
+ bg.setComment(groupComment);
+ addBundleGroup(bg);
+ }
+
+ /**
+ * Removes the group of the given name if it exists in the bundle
+ */
+
+ public void removeGroup(String groupName) {
+ Iterator iter = groups.iterator();
+ while (iter.hasNext()) {
+ BundleGroup tempGroup = (BundleGroup)iter.next();
+ if (tempGroup.getName().equals(groupName)) {
+ groups.remove(tempGroup);
+ break;
+ }
+ }
+ // Remove the items from the untanslated items
+ removeUntranslatedItemsByGroup(groupName);
+
+ // Loop through all Items
+ Enumeration enum = allItems.elements();
+ while(enum.hasMoreElements()) {
+ BundleItem item = (BundleItem)enum.nextElement();
+ if (item.getParentGroup().getName().equals(groupName)) {
+ allItems.remove(item);
+ }
+ }
+ }
+
+ /**
+ * Removes a single resource item from the bundle
+ */
+
+ public void removeItem(String key) {
+ Object o = allItems.get(key);
+ if (o != null) {
+ BundleItem item = (BundleItem)o;
+ // Remove from allItems Hashtable
+ allItems.remove(key);
+ // Remove from item's group
+ if (item.getParentGroup() != null) {
+ BundleGroup group = item.getParentGroup();
+ group.removeBundleItem(key);
+ }
+ // Remove from untranslatedItems Hashtable
+ removeUntranslatedItem(key);
+ }
+ }
+
+ /**
+ * Attempts to add a BundleItem to the untranslatedItems. The addition will fail in two cases: One, if
+ * the item does not all ready belong to this Bundle, and Two, if the item is all ready in the set of
+ * untranslated items.
+ */
+
+ public void addUntranslatedItem(BundleItem item) {
+ if (item.getParentGroup().getParentBundle() != this)
+ return;
+ // Remove it if it exists.
+ if (untranslatedItems.contains(item)) {
+ untranslatedItems.remove(item);
+ }
+ untranslatedItems.add(item);
+ }
+
+ /**
+ * Returns the number of items currently marked as untranslated
+ */
+
+ public int getUntranslatedItemsSize() {
+ return untranslatedItems.size();
+ }
+
+ /**
+ * Returns the indexth untranslated item
+ */
+
+ public BundleItem getUntranslatedItem(int index) {
+ if (index >= untranslatedItems.size())
+ return null;
+ Iterator iter = untranslatedItems.iterator();
+ for (int i=0; i < index; i++)
+ iter.next();
+ return (BundleItem)iter.next();
+ }
+
+ /**
+ * Return the various resource bundle groups stored in a Vector collection.
+ */
+
+ public Vector getGroupsAsVector() {
+ Vector v = new Vector();
+ Iterator iter = groups.iterator();
+ while (iter.hasNext()) {
+ BundleGroup group = (BundleGroup)iter.next();
+ v.addElement(group);
+ }
+ return v;
+ }
+
+ /**
+ * Returns the number of groups in the bundle.
+ */
+
+ public int getGroupCount() {
+ return groups.size();
+ }
+
+ /**
+ * Returns a bundle group given a certain index.
+ */
+
+ public BundleGroup getBundleGroup(int index) {
+ if (index >= getGroupCount())
+ return null;
+ Iterator iter = groups.iterator();
+ for (int i=0; i < index; i++)
+ iter.next();
+ return (BundleGroup)iter.next();
+ }
+
+ /**
+ * Looks for a bundle group of a given name within a bundle and
+ * returns it if found.
+ */
+
+ public BundleGroup getBundleGroup(String groupName) {
+ Iterator iter = groups.iterator();
+ while(iter.hasNext()) {
+ BundleGroup group = (BundleGroup)iter.next();
+ if (group.getName().equals(groupName))
+ return group;
+ }
+ return null;
+ }
+
+ /**
+ * Looks up and returns a bundle item stored in the bundle based on its
+ * NLS lookup key.
+ */
+
+ public BundleItem getBundleItem(String key) {
+ return (BundleItem)allItems.get(key);
+ }
+
+ /**
+ * One group is created for all bundles called 'Ungrouped Items'. This is the bundle
+ * group in which bundle items are placed that are not specifically grouped in the
+ * resource bundle file. This method returns that bundle group.
+ */
+
+ public BundleGroup getUngroupedGroup() {
+ return getBundleGroup("Ungrouped Items");
+ }
+
+ /**
+ * Add a bundle group to the bundle
+ */
+
+ public void addBundleGroup(BundleGroup bg) {
+ groups.add(bg);
+ }
+
+ /**
+ * Add a bundle item to the bundle. This bundle item should all ready have its
+ * bundle group assigned.
+ */
+
+ public void addBundleItem(BundleItem item) {
+ if (allItems.containsKey(item.getKey())) {
+ duplicates.addElement(item);
+ } else {
+ if (!(groups.contains(item.getParentGroup())))
+ addBundleGroup(item.getParentGroup());
+ item.getParentGroup().addBundleItem(item);
+ allItems.put(item.getKey(), item);
+ removeUntranslatedItem(item.getKey());
+ if (!item.isTranslated())
+ addUntranslatedItem(item);
+ }
+ }
+
+ /**
+ * A method useful in debugging. The string returned displays the encoding
+ * information about the bundle and wether or not it is the base class of
+ * a resource bundle.
+ */
+
+ public String toString() {
+ String retStr = new String();
+ if (language != null && !language.equals("")) retStr = language;
+ if (country != null && !country.equals("")) retStr += ", " + country;
+ if (variant != null && !variant.equals("")) retStr += ", " + variant;
+
+ retStr += " (" + (encoding == null || encoding.equals("") ? "Base Class" : encoding) + ")";
+ return retStr;
+ }
+
+ /**
+ * This method produces a String which is suitable for inclusion in a .properties
+ * style resource bundle. It attaches (in comments) the meta data that RBManager
+ * reads to manage the resource bundle file. This portion of the output should
+ * be included at the beginning of the resource bundle file.
+ */
+
+ public String toOutputString() {
+ String retStr = "# @file " + name + "\n";
+ if (encoding != null) retStr += "# @fileEncoding " + encoding + "\n";
+ if (language != null) retStr += "# @fileLanguage " + language + "\n";
+ if (country != null) retStr += "# @fileCountry " + country + "\n";
+ if (variant != null) retStr += "# @fileVariant " + variant + "\n";
+ if (manager != null) retStr += "# @fileManager " + manager + "\n";
+ if (comment != null) retStr += "# @fileComment " + comment + "\n";
+ return retStr;
+ }
+
+ /**
+ * A helping method for outputting the formatted contents of the bundle to a
+ * print stream. The method first outputs the header information and then outputs
+ * each bundle group's formatted data which includes each bundle item.
+ */
+
+ public void writeContents(PrintStream ps) {
+ ps.println(this.toOutputString());
+ Iterator iter = groups.iterator();
+ while (iter.hasNext()) {
+ ((BundleGroup)iter.next()).writeContents(ps);
+ }
+ }
+
+ /**
+ * A helping method for outputting the formatted contents of the bundle to a
+ * ouput Writer (such as a FileWriter). The method first outputs the header
+ * information and then outputs each bundle group's formatted data which includes
+ * each bundle item.
+ */
+
+ public void writeContents(Writer w) throws IOException {
+ w.write(this.toOutputString() + "\n");
+ Iterator iter = groups.iterator();
+ while (iter.hasNext()) {
+ ((BundleGroup)iter.next()).writeContents(w);
+ }
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/BundleGroup.java b/unicodetools/com/ibm/rbm/BundleGroup.java
new file mode 100644
index 0000000..a5ccedb
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/BundleGroup.java
@@ -0,0 +1,188 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.Writer;
+import java.util.*;
+
+/**
+ * A class representing a group of BundleItems and the meta data associated with that group
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class BundleGroup {
+ private String name; // The name of the group
+ private String comment; // A comment describing this group
+ private TreeSet items; // The NLS items contained in this group
+ private Bundle bundle; // The parent Bundle object of this group
+
+ /**
+ * Basic data constructor.
+ * Creates a BundleGroup with a parent bundle and a given name.
+ */
+ public BundleGroup(Bundle parent, String name) {
+ bundle = parent;
+ this.name = name;
+ comment = null;
+ items = new TreeSet(new Comparator(){
+ public boolean equals(Object o) { return false; }
+ public int compare(Object o1, Object o2) {
+ if (!(o1 instanceof BundleItem) || !(o2 instanceof BundleItem))
+ return 0;
+ BundleItem i1 = (BundleItem)o1;
+ BundleItem i2 = (BundleItem)o2;
+ return i1.getKey().compareTo(i2.getKey());
+ }
+ });
+ }
+
+ /**
+ * Two bundle groups are considered equal iff their names are the same.
+ */
+ public boolean equals(Object o) {
+ return (o instanceof BundleGroup && ((BundleGroup)o).getName().equals(name));
+ }
+
+ // This should be changed anywhere it is used
+
+ public Vector getItemsAsVector() {
+ Vector v = new Vector();
+ Iterator iter = items.iterator();
+ while (iter.hasNext()) {
+ v.addElement(iter.next());
+ }
+ return v;
+ }
+
+ /**
+ * Adds a BundleItem to the group as long as that item is not currently in the group.
+ * If the item.group is not equal to this group, then it is changed to be this group.
+ * This method should, in most cases, only be called from the Bundle class.
+ */
+ public void addBundleItem(BundleItem item) {
+ if (items.contains(item)) {
+ items.remove(item);
+ }
+ item.setParentGroup(this);
+ items.add(item);
+ }
+
+ /**
+ * Remove an item of the given name from the group
+ */
+ public void removeBundleItem(String itemName) {
+ Iterator iter = items.iterator();
+ while(iter.hasNext()) {
+ BundleItem item = (BundleItem)iter.next();
+ if (item.getKey().equals(itemName)) {
+ items.remove(item);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Returns the number of items stored in the group
+ */
+ public int getItemCount() {
+ return items.size();
+ }
+
+ /**
+ * Returns a BundleItem from the set of items at a particular index point.
+ * If the index is greater than or equal to the number of items in the set,
+ * null is returned.
+ */
+ public BundleItem getBundleItem(int index) {
+ if (index >= items.size())
+ return null;
+ Iterator iter = items.iterator();
+ for (int i=0; i < index; i++)
+ iter.next();
+ return (BundleItem)iter.next();
+ }
+
+ /**
+ * Returns the bundle to which this group belongs
+ */
+ public Bundle getParentBundle() {
+ return bundle;
+ }
+
+ /**
+ * Returns the comment associated with this bundle
+ */
+ public String getComment() {
+ return comment;
+ }
+
+ /**
+ * Returns the name of the bundle
+ */
+ public String getName() {
+ return name;
+ }
+
+ protected void setParentBundle(Bundle bundle) {
+ this.bundle = bundle;
+ }
+
+ public void setComment(String comment) {
+ this.comment = comment;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ /**
+ * The translation to a string returns the name of the group
+ */
+ public String toString() {
+ return name;
+ }
+
+ /**
+ * Returns the output for a group heading.
+ * This will be found in comment lines above the group items
+ */
+ public String toOutputString() {
+ String retStr = "\n#\n# @group " + name + "\n#\n";
+ if (comment != null)
+ retStr += "# @groupComment " + comment + "\n";
+ return retStr;
+ }
+
+ /**
+ * Writes the output contents to a particular PrintStream.
+ * The output will be suitable for a properly formatted .properties file.
+ */
+ public void writeContents(PrintStream ps) {
+ if (!name.equals("Ungrouped Items"))
+ ps.println(this.toOutputString());
+ Iterator iter = items.iterator();
+ while (iter.hasNext()) {
+ ((BundleItem) iter.next()).writeContents(ps);
+ }
+ }
+
+ /**
+ * Writes the output contents to a particular Writer.
+ * The output will be suitable for a properly formatted .properties file.
+ */
+ public void writeContents(Writer w) throws IOException {
+ if (!name.equals("Ungrouped Items"))
+ w.write(this.toOutputString() + "\n");
+ Iterator iter = items.iterator();
+ while (iter.hasNext()) {
+ ((BundleItem) iter.next()).writeContents(w);
+ }
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/BundleItem.java b/unicodetools/com/ibm/rbm/BundleItem.java
new file mode 100644
index 0000000..a53bd4e
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/BundleItem.java
@@ -0,0 +1,393 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2002, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ *
+ * $Source: /xsrl/Nsvn/icu/unicodetools/com/ibm/rbm/BundleItem.java,v $
+ * $Date: 2004/07/14 18:29:00 $
+ * $Revision: 1.4 $
+ *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.io.Writer;
+import java.text.SimpleDateFormat;
+import java.text.ParseException;
+import java.util.*;
+
+/**
+ * A class representing a single translation item and all of the meta-data associated with that translation
+ *
+ * @author Jared Jackson - Email: <a href="mailto:jjared@almaden.ibm.com">jjared@almaden.ibm.com</a>
+ * @see com.ibm.rbm.RBManager
+ */
+public class BundleItem {
+ private String name; // The name of the NLS item key
+ private String value; // The translation of the key item
+ private String comment; // A comment about this item
+ private boolean translated; // Has this item been translated?
+ private Date created; // The date of creation of the item
+ private Date modified; // The last modification date of the item
+ private String creator; // The name of the person who created the item
+ private String modifier; // The name of the person who last modified the item
+ private Hashtable lookups; // A hastable of lookups for the item (i.e. ({#}, Meaning) pairs)
+ private BundleGroup group; // The parent group of the item
+
+ /**
+ * Basic data constructor for a resource bundle item.
+ * @param parent The BundleGroup to which the item belongs. This group will have its own Bundle parent.
+ * @param name The NLS lookup key common across all bundle files in the resource bundle
+ * @param value The translated value of the item appropriate for the encoding of the bundle file to which the item belongs
+ */
+
+ public BundleItem(BundleGroup parent, String name, String value) {
+ this.name = name;
+ this.value = value;
+ this.group = parent;
+ comment = null;
+ translated = false;
+ created = new Date(); // Defaults to the system's current date
+ modified = new Date(); // Defaults to the system's current date
+ creator = null;
+ modifier = null;
+ lookups = new Hashtable();
+ }
+
+ /**
+ * Returns the BundleGroup to which this item belongs
+ */
+
+ public BundleGroup getParentGroup() {
+ return group;
+ }
+
+ /**
+ * Returns the date this item was last modified.
+ */
+
+ public Date getModifiedDate() {
+ return modified;
+ }
+
+ /**
+ * Returns the date the item was first created.
+ */
+
+ public Date getCreatedDate() {
+ return created;
+ }
+
+ /**
+ * Returns the login name of the user that created the item.
+ */
+
+ public String getCreator() {
+ return creator;
+ }
+
+ /**
+ * Returns the login name of the user that last modified the item.
+ */
+
+ public String getModifier() {
+ return modifier;
+ }
+
+ /**
+ * Returns the NLS lookup key for the item.
+ */
+
+ public String getKey() {
+ return name;
+ }
+
+ /**
+ * Returns the translation value for the item.
+ */
+
+ public String getTranslation() {
+ return value;
+ }
+
+ /**
+ * Returns a comment associated with the item.
+ */
+
+ public String getComment() {
+ return comment;
+ }
+
+ /**
+ * Has the item yet been translated, or was it merely derived from a previous
+ * bundle file?
+ */
+
+ public boolean isTranslated() {
+ return translated;
+ }
+
+ /**
+ * Returns a hashtable of the various lookups associated with the item. Lookups are
+ * context sensitive information stored within the resource item and have their own
+ * meta-data associated with themselves.
+ */
+
+ public Hashtable getLookups() {
+ return lookups;
+ }
+
+ /**
+ * Sets the translated value of the item. A true mark indicates that the item has
+ * been examined or modified and is ready for use in the encoding specified by the
+ * parent Bundle.
+ */
+
+ public void setTranslated(boolean isTranslated) {
+ if (translated == isTranslated) return;
+ translated = isTranslated;
+ if (this.getParentGroup() != null && this.getParentGroup().getParentBundle() != null) {
+ Bundle bundle = this.getParentGroup().getParentBundle();
+ if (isTranslated) bundle.removeUntranslatedItem(this.name);
+ else bundle.addUntranslatedItem(this);
+ }
+ }
+
+ /**
+ * Sets the comment associated with this item.
+ */
+
+ public void setComment(String comment) {
+ this.comment = comment;
+ }
+
+ /**
+ * Given a hashtable of lookups, associates those lookups with this item.
+ */
+
+ public void setLookups(Hashtable lookups) {
+ this.lookups = lookups;
+ }
+
+ /**
+ * Sets the NLS key associated with this item. Be careful using this method, as
+ * it does not change the lookup value of any other items in the resource bundle.
+ * This must be done at a higher level.
+ */
+
+ public void setKey(String keyName) {
+ name = keyName;
+ }
+
+ /**
+ * Sets the translation value of the item.
+ */
+
+ public void setTranslation(String translationValue) {
+ value = translationValue;
+ }
+
+ /**
+ * Sets the parent BundleGroup of the item.
+ */
+
+ public void setParentGroup(BundleGroup group) {
+ this.group = group;
+ }
+
+ /**
+ * Associates a login name of the creator of the item with the item.
+ */
+
+ public void setCreator(String name) {
+ creator = name;
+ }
+
+ /**
+ * Associates a login name of the last modifier of the item with the item.
+ */
+
+ public void setModifier(String name) {
+ modifier = name;
+ }
+
+ /**
+ * Sets the created date of the item given a date formatted string.
+ * The format can be either 'YYYY-MM-DD' (e.g. 20002-02-05) or
+ * the format can be 'YYYMMDDTHHMMSSZ' (e.g. 20020205T103000Z)
+ */
+
+ public void setCreatedDate(String dateStr) {
+ if (dateStr != null) created = parseDateFromString(dateStr);
+ }
+
+ /**
+ * Sets the created date of the item.
+ */
+
+ public void setCreatedDate(Date date) {
+ created = date;
+ }
+
+ /**
+ * Sets the last modififcation date of the item given a date formatted string.
+ * The format can be either 'YYYY-MM-DD' (e.g. 2002-02-05) or
+ * the format can be 'YYYMMDDTHHMMSSZ' (e.g. 20020205T103000Z)
+ */
+
+ public void setModifiedDate(String dateStr) {
+ if (dateStr != null)
+ modified = parseDateFromString(dateStr);
+ }
+
+ /**
+ * Sets the last modification date of the item.
+ */
+
+ public void setModifiedDate(Date date) {
+ modified = date;
+ }
+
+ /**
+ * Simply returns the lookup name of the item.
+ */
+
+ public String toString() {
+ return name;
+ }
+
+ /**
+ * Returns the formatted output of this bundle item as it would be included in a .properties
+ * formatted resource bundle file. This format also contains the meta-data used by RBManager in
+ * the form of parseable comments.
+ */
+
+ public String toOutputString() {
+ String retStr = (translated ? "# @translated true" : "# @translated false");
+ if (created != null) {
+ GregorianCalendar createdCal = new GregorianCalendar();
+ createdCal.setTime(created);
+ int year = createdCal.get(Calendar.YEAR);
+ int month = createdCal.get(Calendar.MONTH)+1;
+ int day = createdCal.get(Calendar.DAY_OF_MONTH);
+ retStr += " @created " + String.valueOf(year) + "-"
+ + (month > 9 ? String.valueOf(month) : "0" + String.valueOf(month)) + "-"
+ + (day > 9 ? String.valueOf(day) : "0" + String.valueOf(day));
+ }
+ if (modified != null) {
+ GregorianCalendar modifiedCal = new GregorianCalendar();
+ modifiedCal.setTime(modified);
+ int year = modifiedCal.get(Calendar.YEAR);
+ int month = modifiedCal.get(Calendar.MONTH)+1;
+ int day = modifiedCal.get(Calendar.DAY_OF_MONTH);
+ retStr += " @modified " + String.valueOf(year) + "-"
+ + (month > 9 ? String.valueOf(month) : "0" + String.valueOf(month)) + "-"
+ + (day > 9 ? String.valueOf(day) : "0" + String.valueOf(day));
+ }
+ if (creator != null) retStr += " @creator " + creator;
+ if (modifier != null) retStr += " @modifier " + modifier;
+ Enumeration enum = lookups.keys();
+ while (enum.hasMoreElements()) {
+ String str = (String)enum.nextElement();
+ retStr += "\n# @{" + str + "} " + (String)lookups.get(str);
+ }
+ if (comment != null) retStr += "\n# @comment " + comment;
+
+ retStr += "\n" + name + "=" + saveConvert(value);
+ return retStr;
+ }
+
+ /**
+ * Writes the formatted contents to a PrintStream.
+ */
+
+ public void writeContents(PrintStream ps) {
+ ps.println(this.toOutputString());
+ }
+
+ /**
+ * Writes the formatted contents to a writer such as a FileWriter.
+ */
+
+ public void writeContents(Writer w) throws IOException {
+ w.write(this.toOutputString() + "\n");
+ }
+
+ /*
+ * Converts unicodes to encoded \\uxxxx
+ * and writes out any of the characters in specialSaveChars
+ * with a preceding slash
+ */
+ // Taken from java.util.Properties
+ private String saveConvert(String theString) {
+ char aChar;
+ int len = theString.length();
+ StringBuffer outBuffer = new StringBuffer(len*2);
+
+ for(int x=0; x<len; ) {
+ aChar = theString.charAt(x++);
+ switch(aChar) {
+ case '\\':outBuffer.append('\\'); outBuffer.append('\\');
+ continue;
+ case '\t':outBuffer.append('\\'); outBuffer.append('t');
+ continue;
+ case '\n':outBuffer.append('\\'); outBuffer.append('n');
+ continue;
+ case '\r':outBuffer.append('\\'); outBuffer.append('r');
+ continue;
+ case '\f':outBuffer.append('\\'); outBuffer.append('f');
+ continue;
+ default:
+ if ((aChar < 20) || (aChar > 127)) {
+ outBuffer.append('\\');
+ outBuffer.append('u');
+ outBuffer.append(toHex((aChar >> 12) & 0xF));
+ outBuffer.append(toHex((aChar >> 8) & 0xF));
+ outBuffer.append(toHex((aChar >> 4) & 0xF));
+ outBuffer.append(toHex((aChar >> 0) & 0xF));
+ }
+ else {
+ if (specialSaveChars.indexOf(aChar) != -1)
+ outBuffer.append('\\');
+ outBuffer.append(aChar);
+ }
+ }
+ }
+ return outBuffer.toString();
+ }
+
+ /**
+ * Convert a nibble to a hex character
+ * @param nibble the nibble to convert.
+ */
+ // Taken from java.util.Properties
+ private static char toHex(int nibble) {
+ return hexDigit[(nibble & 0xF)];
+ }
+
+ /** A table of hex digits */
+ // Taken from java.util.Properties
+ private static final char[] hexDigit = {
+ '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
+ };
+
+ // Taken from java.util.Properties
+ private static final String specialSaveChars = "=: \t\r\n\f#!";
+
+ private Date parseDateFromString(String dateStr) {
+ SimpleDateFormat format = null;
+ if (dateStr.length() == 10)
+ format = new SimpleDateFormat("yyyy-MM-dd"); // Simple format
+ else
+ format = new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'"); // TMX ISO format
+ try {
+ return format.parse(dateStr);
+ } catch (ParseException pe) {
+ return new Date();
+ }
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/Occurance.java b/unicodetools/com/ibm/rbm/Occurance.java
new file mode 100644
index 0000000..b9bcbda
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/Occurance.java
@@ -0,0 +1,63 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+/**
+ * This is a class used by the RBReporter to track occurances of a resource
+ * key found while scanning a text code file. It is used mainly to produce error
+ * messages with helpful context information.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBReporter
+ */
+public class Occurance {
+ private String file_name;
+ private String file_path;
+ private int line_number;
+
+ /**
+ * Basic data constructor.
+ */
+
+ Occurance (String file_name, String file_path, int line_number) {
+ this.file_name = file_name;
+ this.file_path = file_path;
+ this.line_number = line_number;
+ }
+
+ /**
+ * Returns the associated file name of the occurance
+ */
+
+ public String getFileName() {
+ return file_name;
+ }
+
+ /**
+ * Returns the associated file path of the occurance
+ */
+
+ public String getFilePath() {
+ return file_path;
+ }
+
+ /**
+ * Returns the line number of the occurance.
+ */
+
+ public int getLineNumber() {
+ return line_number;
+ }
+
+ /**
+ * A representation of the occurance of the form 'Occurance: _file_path_ (_line_number_)'
+ */
+
+ public String toString() {
+ return "Occurance: " + file_path + " (" + line_number + ")";
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/Preferences.java b/unicodetools/com/ibm/rbm/Preferences.java
new file mode 100644
index 0000000..3d94b8a
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/Preferences.java
@@ -0,0 +1,181 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.util.*;
+import java.io.*;
+
+/**
+ * This class defines the methods used by RBManager to access, set, and store
+ * individual user preferences for the application. All of the public methods defined
+ * in this class are static, and so the class need not be instantiated.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class Preferences {
+ // Default values
+ private static final int NUM_RECENT_FILES = 4;
+ private static final String EMPTY_STRING = "";
+ private static Properties prop;
+
+ /**
+ * Retrieve a preference by its key name
+ * @param name The name of the key associated with one preference
+ * @return The value of the preference sought
+ */
+
+ public static String getPreference(String name) {
+ if (prop == null) init();
+ Object o = prop.get(name);
+ if (o == null || !(o instanceof String)) return EMPTY_STRING;
+ return (String)o;
+ }
+
+ /**
+ * Sets a preference by key name and value. If the key name all ready exists, that
+ * preference is overwritten without warning.
+ * @param name The name of the key associated with the preference
+ * @param value The value of the preference to be set and later retrieved. If this value is null, the property of this name is erased.
+ */
+
+ public static void setPreference(String name, String value) {
+ if (prop == null) init();
+ if (value == null) {
+ // In this case, we will remove the property
+ prop.remove(name);
+ }
+ prop.put(name, value);
+ }
+
+ /**
+ * Writes the results of the buffered preferences to file. There is no option for
+ * where this file is saved on the file system.
+ */
+
+ public static void savePreferences() throws IOException {
+ if (prop == null) init();
+ FileOutputStream fos = new FileOutputStream("preferences.properties");
+ prop.store(fos, "RBManager Preferences");
+ fos.flush();
+ fos.close();
+ }
+
+ /**
+ * Given the name of a resource bundle and the file path location of the base
+ * document for that resource bundle, this method will insert that file into
+ * a list of recent files. Currently the past 4 resource bundles visited will
+ * be displayed. This method also sorts the prefences so that the most recently
+ * added will be the first returned, even if that file had all ready existed
+ * in the preferences when it was added.
+ * @param name The name of this file as it will be displayed to the user
+ * @param location The file path to this file (should be absolute).
+ */
+
+ public static void addRecentFilePreference(String name, String location) {
+ Vector existingNames = new Vector();
+ Vector existingLocations = new Vector();
+ for (int i=0; i < NUM_RECENT_FILES; i++) {
+ String oldName = getPreference("recentfileid" + String.valueOf(i));
+ String oldLocation = getPreference("recentfileloc" + String.valueOf(i));
+ if (oldName.equals(EMPTY_STRING) || oldLocation.equals(EMPTY_STRING)) break;
+ existingNames.addElement(oldName);
+ existingLocations.addElement(oldLocation);
+ }
+ // Check to see if the file is all ready in there
+ int swap_start = 0;
+ int old_size = existingLocations.size();
+ for (int i=0; i <= old_size; i++) {
+ if (i == existingLocations.size()) {
+ // No match was found, pull all the elements down one
+ swap_start = i;
+ if (swap_start >= NUM_RECENT_FILES) swap_start = NUM_RECENT_FILES-1;
+ else {
+ // Extend the length of the vectors
+ existingNames.addElement(EMPTY_STRING);
+ existingLocations.addElement(EMPTY_STRING);
+ }
+ } else {
+ String oldLocation = (String)existingLocations.elementAt(i);
+ if (oldLocation.equals(location)) {
+ // We found a match, pull this one to the front
+ swap_start = i;
+ break;
+ }
+ }
+ }
+
+ // Move the files down the line as appropriate
+ for (int i=swap_start; i > 0; i--) {
+ existingLocations.setElementAt(existingLocations.elementAt(i-1),i);
+ existingNames.setElementAt(existingNames.elementAt(i-1),i);
+ }
+ existingLocations.setElementAt(location, 0);
+ existingNames.setElementAt(name, 0);
+
+ // Set the properties
+ for (int i=0; i < existingLocations.size(); i++) {
+ setPreference("recentfileid" + String.valueOf(i), (String)existingNames.elementAt(i));
+ setPreference("recentfileloc" + String.valueOf(i), (String)existingLocations.elementAt(i));
+ }
+ for (int i=existingLocations.size(); i < NUM_RECENT_FILES; i++) {
+ setPreference("recentfileid" + String.valueOf(i), EMPTY_STRING);
+ setPreference("recentfileloc" + String.valueOf(i), EMPTY_STRING);
+ }
+ try {
+ savePreferences();
+ } catch (IOException ioe) {} // Ignore, its not critical
+ }
+
+ /**
+ * Returns a list of the names and locations of the various recently used files.
+ * @return A Vector of Strings which is twice in length the number of files known about. The vector contains name 1 then location 1, then name 2 ...
+ */
+
+ public static Vector getRecentFilesPreferences() {
+ if (prop == null) init();
+ Vector existing = new Vector();
+ for (int i=0; i < NUM_RECENT_FILES; i++) {
+ String name = getPreference("recentfileid" + String.valueOf(i));
+ String location = getPreference("recentfileloc" + String.valueOf(i));
+ if (name.equals(EMPTY_STRING) || location.equals(EMPTY_STRING)) break;
+ existing.addElement(name);
+ existing.addElement(location);
+ }
+ return existing;
+ }
+
+ private static void init() {
+ Properties defaults = new Properties();
+ // This values are needed and are specified by default
+ // If they exist in the file, they will be overwritten
+ defaults.put("username", Resources.getTranslation("unknown_user"));
+ defaults.put("locale", "en");
+ defaults.put("lookandfeel", "");
+
+ prop = new Properties(defaults);
+ try {
+ FileInputStream fis = new FileInputStream("preferences.properties");
+ prop.load(fis);
+ } catch (IOException ioe) {
+ System.err.println("Error reading properties");
+ ioe.printStackTrace(System.err);
+ }
+ try {
+ savePreferences();
+ } catch (IOException ioe) {
+ System.err.println("Error saving preferences " + ioe.getMessage());
+ }
+ }
+
+ /*
+ public static void main(String args[]) {
+ // Test
+ init();
+ }
+ */
+}
diff --git a/unicodetools/com/ibm/rbm/RBExporter.java b/unicodetools/com/ibm/rbm/RBExporter.java
new file mode 100644
index 0000000..7da5804
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBExporter.java
@@ -0,0 +1,24 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.IOException;
+
+import javax.swing.*;
+
+/**
+ * This is the super class for all exporter plug-in classes. As of yet, there
+ * is little contained in this class.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public abstract class RBExporter {
+ protected static JFileChooser chooser;
+
+ public abstract void export(RBManager rbm) throws IOException;
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBICUExporter.java b/unicodetools/com/ibm/rbm/RBICUExporter.java
new file mode 100644
index 0000000..babc11e
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBICUExporter.java
@@ -0,0 +1,191 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import javax.swing.*;
+import java.util.*;
+
+/**
+ * This class provides a plug-in exporter utility for RBManager that outputs ICU
+ * resource bundle files in the according to the file structure of Resource
+ * Bundles. Most of the meta-data is lost in this export.
+ *
+ * @author George Rhoten
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBICUExporter extends RBExporter {
+ /** Do characters beyond \\u007f need \\u escape notation? */
+ private boolean escapeNonAscii = false;
+
+ /** Write the meta data for each resource? */
+ private boolean writeMetaData = true;
+
+ /** Write the groups as keys? */
+ private boolean writeGroupsAsKeys = false;
+
+ public RBICUExporter() {
+ super();
+
+ // Initialize the file chooser if necessary
+ if (chooser == null) {
+ chooser = new JFileChooser();
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public String getDescription() {
+ return "root ICU File";
+ }
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ return (f.getName().startsWith("root."));
+ }
+ });
+ } // end if
+ }
+
+ public void export(RBManager rbm) throws IOException {
+ if (rbm == null) return;
+ // Open the Save Dialog
+ int ret_val = chooser.showSaveDialog(null);
+ if (ret_val != JFileChooser.APPROVE_OPTION) {
+ return;
+ }
+ // Retrieve basic file information
+ File file = chooser.getSelectedFile(); // The file(s) we will be working with
+ File directory = new File(file.getParent()); // The directory we will be writing to
+ String base_name = file.getName(); // The base name of the files we will write
+ if (base_name == null || base_name.equals("")) {
+ base_name = rbm.getBaseClass();
+ }
+ if (base_name.toLowerCase().endsWith(".properties")) {
+ base_name = base_name.substring(0,base_name.length()-11);
+ }
+
+ Vector bundle_v = rbm.getBundles();
+ for (int i=0; i < bundle_v.size(); i++) {
+ Bundle bundle = (Bundle)bundle_v.elementAt(i);
+ String base_enc = base_name;
+ if (bundle.encoding != null && !bundle.encoding.equals("")) {
+ base_enc = base_enc + "_" + bundle.encoding;
+ }
+ String file_name = base_enc + ".txt";
+ String header = "\ufeff// Resource Bundle: " + file_name + " - File automatically generated by RBManager at " + (new Date());
+
+ OutputStream fos = new FileOutputStream(new File(directory, file_name));
+ PrintWriter resOut = new PrintWriter(new OutputStreamWriter(fos, "UTF-8"));
+
+ Vector group_v = bundle.getGroupsAsVector();
+ resOut.println(header);
+ resOut.println(base_enc + " { ");
+ for (int j=0; j < group_v.size(); j++) {
+ BundleGroup group = (BundleGroup)group_v.elementAt(j);
+
+ Vector itemVect = group.getItemsAsVector();
+ int itemVectSize = itemVect.size();
+ if (itemVectSize > 0) {
+ if (writeMetaData) {
+ String groupComment = group.getComment();
+ if (groupComment != null && !groupComment.equals("")) {
+ resOut.println(" // @groupComment " + groupComment);
+ }
+ }
+
+ boolean writeGroupName = !bundle.getUngroupedGroup().getName().equals(group.getName());
+ if (writeGroupName) {
+ if (writeGroupsAsKeys) {
+ resOut.println(" " + escapeString(group.getName(), true) + " { ");
+ }
+ else if (writeMetaData) {
+ resOut.println(" // @group " + escapeString(group.getName(), true));
+ }
+ }
+ for (int k=0; k < itemVectSize; k++) {
+ BundleItem item = (BundleItem)itemVect.elementAt(k);
+
+ if (writeMetaData) {
+ resOut.print(" //");
+ resOut.print(" @translated " + item.isTranslated());
+ resOut.print(" @created " + item.getCreatedDate());
+ resOut.print(" @modified " + item.getModifiedDate());
+ resOut.print(" @creator " + item.getCreator());
+ resOut.println(" @modifier " + item.getModifier());
+ String itemComment = item.getComment();
+ if (itemComment != null && !itemComment.equals("")) {
+ resOut.println(" // @comment " + itemComment);
+ }
+ }
+
+ resOut.println(" " + escapeString(item.getKey(), true)
+ + " { " + escapeString(item.getTranslation(), false) + " }");
+ } // end for - k
+ if (writeGroupName && writeGroupsAsKeys) {
+ resOut.println(" }");
+ }
+ }
+ } // end for - j
+ resOut.println("}");
+
+ // Write out the file
+ resOut.close();
+ fos.close();
+ } // end for - i
+ }
+
+ /**
+ * Escape a string according to how the ICU tool "genrb" handles strings.
+ * @param str The string to escape
+ * @param isKey If this is a key, then quotes are optional.
+ * @return A string that can be used in an ICU resource bundle.
+ */
+ protected String escapeString(String str, boolean isKey) throws IOException {
+ StringBuffer strBuf = new StringBuffer();
+ int len = str.length();
+ boolean quoteRequired = !isKey;
+ for (int idx = 0; idx < len; idx++) {
+ int ch = str.charAt(idx);
+ if (ch <= ' ' || '~' < ch) {
+ if (isKey && ch != ' ') {
+ IOException e = new IOException(str + " needs to use invariant characters for the key.");
+ e.fillInStackTrace();
+ throw e;
+ } else if (escapeNonAscii && ch != ' ') {
+ String zeros;
+ String hexNum;
+ if ((ch & 0xf800) == 0xd800) {
+ // We assume that we found a valid UTF-16 string with a surrogate
+ int ch2 = str.charAt(idx++);
+ int chSurrogate = (((ch)<<10)+(ch2)-((0xd800<<10)+0xdc00-0x10000));
+
+ zeros = "00000000";
+ hexNum = Integer.toHexString(chSurrogate);
+ strBuf.append("\\U");
+ } else {
+ zeros = "0000";
+ hexNum = Integer.toHexString(ch);
+ strBuf.append("\\u");
+ }
+ strBuf.append(zeros.substring(hexNum.length()) + hexNum.toUpperCase());
+ } else {
+ quoteRequired = true;
+ strBuf.append(ch);
+ }
+ } else if (ch == '\"') {
+ quoteRequired = true;
+ strBuf.append("\\\"");
+ } else {
+ if (ch == '{' || ch == '}') {
+ quoteRequired = true;
+ }
+ strBuf.append(ch);
+ }
+ }
+ if (quoteRequired) {
+ strBuf.insert(0, '\"');
+ strBuf.append('\"');
+ }
+ return strBuf.toString();
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBImporter.java b/unicodetools/com/ibm/rbm/RBImporter.java
new file mode 100644
index 0000000..b9f8e1f
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBImporter.java
@@ -0,0 +1,452 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import javax.swing.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+import java.util.*;
+import java.awt.*;
+import java.awt.event.*;
+
+/**
+ * <P>This is the super class for all importer plug-in classes.</P>
+ * <P>
+ * In terms of general functionality of this class or its children classes, the following steps should happen in order:
+ * <OL>
+ * <LI>A Dialog is shown from which the user may select options about the import, including the file from which to import.</LI>
+ * <LI>The 'Import' button is pressed, closing the options dialog and opening a progress bar dialog box.</LI>
+ * <LI>The class should resolve all conflicts with locale encodings existing in the import files, but not in the active resource bundle.</LI>
+ * <LI>The class should parse resources one at a time and use the importResource() method to insert them into the resource bundle.</LI>
+ * <LI>The class should report when all resources have been read and the import is complete.</LI>
+ * </OL>
+ * </P>
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBImporter extends JDialog {
+ private final static int FILE_OPTION_POPULATE = 0; // Create a new locale file populated from base file
+ private final static int FILE_OPTION_EMPTY = 1; // Create a new empty locale file
+ private final static int FILE_OPTION_IGNORE = 2; // Ignore all resources from this encoding
+ private final static int FILE_OPTION_PROMPT = 3; // Prompt for each conflict
+ private final static int RESOURCE_OPTION_OVERWRITE = 0; // Use the value from the source import file
+ private final static int RESOURCE_OPTION_IGNORE = 1; // Ignore the import and use existing value
+ private final static int RESOURCE_OPTION_PROMPT = 2; // Propmpt for each conflict
+
+ protected static JFileChooser chooser;
+ protected int num_conflicts;
+ protected int num_extra_files;
+ protected String title;
+ protected RBManager rbm;
+ protected RBManagerGUI gui;
+ protected boolean pathSet = false;
+
+ // Visual Components
+ JRadioButton resourceOverwriteRadio = new JRadioButton(Resources.getTranslation("import_resource_conflict_overwrite"), false);
+ JRadioButton resourceIgnoreRadio = new JRadioButton(Resources.getTranslation("import_resource_conflict_ignore"), false);
+ JRadioButton resourcePromptRadio = new JRadioButton(Resources.getTranslation("import_conflict_prompt"), true);
+ JRadioButton fileGeneratePopulateRadio = new JRadioButton(Resources.getTranslation("import_file_conflict_generate_populate"), false);
+ JRadioButton fileGenerateEmptyRadio = new JRadioButton(Resources.getTranslation("import_file_conflict_generate_empty"), false);
+ JRadioButton fileIgnoreRadio = new JRadioButton(Resources.getTranslation("import_file_conflict_ignore"), false);
+ JRadioButton filePromptRadio = new JRadioButton(Resources.getTranslation("import_conflict_prompt"), true);
+
+ JCheckBox markTranslatedCheck = new JCheckBox(Resources.getTranslation("import_default_translated"), true);
+ JCheckBox createGroupsCheck = new JCheckBox(Resources.getTranslation("import_default_group_creation"), true);
+ JComboBox groupComboBox = new JComboBox();
+
+ JLabel sourceLabel;
+
+ JDialog progressBarDialog;
+ JProgressBar progressBar;
+
+ /**
+ * Constructor
+ * @param title The title that appears in the Dialog box
+ * @param rbm An RBManager instance
+ * @param gui The RBManager GUI instance associated with the RBManager instance
+ */
+
+ public RBImporter(String title, RBManager rbm, RBManagerGUI gui) {
+ super(new Frame(), title, true);
+ this.title = title;
+ this.rbm = rbm;
+ this.gui = gui;
+ init();
+ }
+
+ protected void init() {
+ chooser = new JFileChooser();
+ setupFileChooser();
+ num_conflicts = 0;
+ num_extra_files = 0;
+ initComponents();
+ setVisible(true);
+ }
+
+ protected void setupFileChooser() {
+ // To be overwritten
+ }
+
+ protected void beginImport() throws IOException {
+ // To be overwritten
+ if (!pathSet)
+ throw new IOException("Path not set yet");
+ }
+
+ protected void chooseFile() {
+ int result = chooser.showOpenDialog(this);
+ if (result == JFileChooser.APPROVE_OPTION) {
+ File f = chooser.getSelectedFile();
+ sourceLabel.setText(Resources.getTranslation("import_source_file",f.getAbsolutePath()));
+ pathSet = true;
+ }
+ }
+
+ protected File getChosenFile() {
+ return chooser.getSelectedFile();
+ }
+
+ /**
+ * A super class method intended for use of nearly all subclass importers, once a resource
+ * is found by those subclasses. This method is called in order to create the new resource
+ * and handle the various conflict errors that may result as a part of that import.
+ */
+
+ protected void importResource(BundleItem item, String encoding, String group_name) {
+ Bundle bundle = null;
+ BundleGroup group = null;
+ BundleGroup backup_group = null;
+
+ if (group_name == null)
+ group_name = getDefaultGroup();
+ if (encoding == null)
+ return;
+ // Get the bundle to which we will be adding this resource
+ bundle = rbm.getBundle(encoding);
+ // Skip this import if the bundle is non-existent (Should have been resolved if wanted)
+ if (bundle == null)
+ return;
+ // Find the group in the bundle, Ungrouped if non-existent
+ Vector gv = bundle.getGroupsAsVector();
+ for (int i=0; i < gv.size(); i++) {
+ BundleGroup tempg = (BundleGroup)gv.elementAt(i);
+ if (i==0) backup_group = tempg;
+ if (tempg.getName().equals("Ungrouped Items")) backup_group = tempg;
+ else if (tempg.getName().equals(group_name)) {
+ group = tempg;
+ break;
+ }
+ }
+ if (group == null) {
+ if (getDefaultGroupCreation()) {
+ // Create a new group by this name
+ bundle.addBundleGroup(group_name, "");
+ gv = bundle.getGroupsAsVector();
+ for (int i=0; i < gv.size(); i++) {
+ BundleGroup tempg = (BundleGroup)gv.elementAt(i);
+ if (tempg.getName().equals(group_name)) {
+ group = tempg;
+ break;
+ }
+ }
+ } else {
+ // Use the backup_group
+ group = backup_group;
+ }
+ }
+ // If all group identification efforts fail, we fail
+ if (group == null)
+ return;
+ item.setParentGroup(group);
+ // Check for and resolve conflicts
+ if (bundle.allItems.containsKey(item.getKey())) {
+ resolveResource(bundle,item);
+ RBManagerGUI.debugMsg("Resolve conflict");
+ } else {
+ // Insert the resource
+ bundle.addBundleItem(item);
+ }
+ }
+
+ /**
+ * This method should be called when trying to import and item whose key all ready exists within the bundle.
+ */
+
+ protected void resolveResource(Bundle bundle, BundleItem item) {
+ if (this.getResourceConflictOption() == RESOURCE_OPTION_IGNORE)
+ return;
+ else if (this.getResourceConflictOption() == RESOURCE_OPTION_OVERWRITE) {
+ bundle.removeItem(item.getKey());
+ bundle.addBundleItem(item);
+ } else if (this.getResourceConflictOption() == RESOURCE_OPTION_PROMPT) {
+ BundleItem original = (BundleItem)bundle.allItems.get(item.getKey());
+ if (original == null)
+ return;
+ String trans = original.getTranslation();
+ String options[] = { Resources.getTranslation("import_resource_conflict_overwrite"),
+ Resources.getTranslation("import_resource_conflict_ignore")};
+ String insert[] = {item.getKey(), (bundle.encoding.equals("") ? "(Base Class)" : bundle.encoding)};
+ String result = (String)JOptionPane.showInputDialog(this, Resources.getTranslation("import_resource_conflict_choose", insert) +
+ "\n" + Resources.getTranslation("import_resource_conflict_choose_source", item.getTranslation()) +
+ "\n" + Resources.getTranslation("import_resource_conflict_choose_target", trans),
+ Resources.getTranslation("import_file_conflicts"), JOptionPane.QUESTION_MESSAGE,
+ null, options, options[0]);
+ if (result == null)
+ return;
+ if (result.equals(Resources.getTranslation("import_resource_conflict_overwrite"))) {
+ bundle.removeItem(item.getKey());
+ bundle.addBundleItem(item);
+ } else if (result.equals(Resources.getTranslation("import_resource_conflict_ignore")))
+ return;
+ }
+ }
+
+ /**
+ * Given a vector of strings containing locale encodings (e.g. {"en", "en_us", "de"}), attempts
+ * to resolve those conflicts according to the preferences selected by the user.
+ */
+
+ protected void resolveEncodings(Vector v) {
+ for (int i=0; i < v.size(); i++) {
+ String encoding = (String)v.elementAt(i);
+ if (encoding == null || encoding.equals("") || rbm.hasResource(encoding)) {
+ continue;
+ }
+
+ // We need to resolve this conflict
+ if (this.getFileConflictOption() == FILE_OPTION_IGNORE) continue;
+ else if (this.getFileConflictOption() == FILE_OPTION_POPULATE) {
+ rbm.createResource(null, null, null, encoding, null, null, null, true);
+ } else if (this.getFileConflictOption() == FILE_OPTION_EMPTY) {
+ rbm.createResource(null, null, null, encoding, null, null, null, true);
+ } else if (this.getFileConflictOption() == FILE_OPTION_PROMPT) {
+ String options[] = { Resources.getTranslation("import_file_conflict_generate_populate"),
+ Resources.getTranslation("import_file_conflict_generate_empty"),
+ Resources.getTranslation("import_file_conflict_ignore")};
+
+ String result = (String)JOptionPane.showInputDialog(this, Resources.getTranslation("import_file_conflict_choose", encoding),
+ Resources.getTranslation("import_file_conflicts"), JOptionPane.QUESTION_MESSAGE,
+ null, options, options[0]);
+ if (result == null) continue;
+ if (result.equals(Resources.getTranslation("import_file_conflict_ignore"))) continue;
+ else if (result.equals(Resources.getTranslation("import_file_conflict_generate_populate"))) {
+ rbm.createResource(null, null, null, encoding, null, null, null, true);
+ } else if (result.equals(Resources.getTranslation("import_file_conflict_generate_empty"))) {
+ rbm.createResource(null, null, null, encoding, null, null, null, false);
+ }
+ }
+ }
+ gui.updateDisplayTree();
+ }
+
+ // Returns an integer mask describing the user's selection for file resolving missing file locale conflicts
+
+ private int getFileConflictOption() {
+ if (fileGeneratePopulateRadio.isSelected()) return FILE_OPTION_POPULATE;
+ if (fileGenerateEmptyRadio.isSelected()) return FILE_OPTION_EMPTY;
+ if (fileIgnoreRadio.isSelected()) return FILE_OPTION_IGNORE;
+ if (filePromptRadio.isSelected()) return FILE_OPTION_PROMPT;
+ return FILE_OPTION_PROMPT;
+ }
+
+ // Returns an integer mask describing the user's selection for duplicate resource key conflicts
+
+ private int getResourceConflictOption() {
+ if (resourceOverwriteRadio.isSelected()) return RESOURCE_OPTION_OVERWRITE;
+ if (resourceIgnoreRadio.isSelected()) return RESOURCE_OPTION_IGNORE;
+ if (resourcePromptRadio.isSelected()) return RESOURCE_OPTION_PROMPT;
+ return RESOURCE_OPTION_PROMPT;
+ }
+
+ // Returns the group name for use when no group name is specified
+
+ protected String getDefaultGroup() {
+ return groupComboBox.getSelectedItem().toString();
+ }
+
+ // Returns the default translation value
+
+ protected boolean getDefaultTranslated() {
+ return markTranslatedCheck.isSelected();
+ }
+
+ // Returns whether or not a group of name non-existant in the active bundle is created
+
+ protected boolean getDefaultGroupCreation() {
+ return createGroupsCheck.isSelected();
+ }
+
+ protected void showProgressBar(int steps) {
+ thisWindowClosing();
+ JDialog progressBarDialog = new JDialog(this, Resources.getTranslation("dialog_title_import_progress"), false);
+ JProgressBar progressBar = new JProgressBar(0, steps);
+ progressBar.setValue(0);
+ progressBarDialog.getContentPane().add(progressBar);
+ progressBarDialog.pack();
+ progressBarDialog.show();
+ }
+
+ protected void incrementProgressBar() {
+ if (progressBar == null) return;
+ progressBar.setValue(progressBar.getValue()+1);
+ if (progressBar.getValue() == progressBar.getMaximum()) hideProgressBar();
+ }
+
+ protected void hideProgressBar() {
+ if (progressBarDialog != null) progressBarDialog.setVisible(false);
+ }
+
+ /**
+ * Initialize the visual components for selecting an import file and setting the appropriate
+ * options
+ */
+
+ protected void initComponents() {
+ // Create Components
+ JLabel titleLabel = new JLabel(title);
+ sourceLabel = new JLabel(Resources.getTranslation("import_source_file","--"));
+ JLabel insertGroupLabel = new JLabel(Resources.getTranslation("import_insert_group"));
+
+ JButton fileChooseButton = new JButton(Resources.getTranslation("button_choose"));
+ JButton cancelButton = new JButton(Resources.getTranslation("button_cancel"));
+ JButton importButton = new JButton(Resources.getTranslation("button_import"));
+
+ ButtonGroup resourceGroup = new ButtonGroup();
+ ButtonGroup fileGroup = new ButtonGroup();
+
+ JPanel topPanel = new JPanel(new BorderLayout());
+ JPanel midPanel = new JPanel(new BorderLayout());
+ JPanel botPanel = new JPanel(new FlowLayout(FlowLayout.RIGHT));
+
+ JPanel topInnerPanel = new JPanel(new BorderLayout());
+
+ Box midBox = new Box(BoxLayout.Y_AXIS);
+
+ JPanel resourcePanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
+ JPanel filePanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
+ JPanel defaultPanel = new JPanel(new FlowLayout(FlowLayout.LEFT));
+ JPanel defaultPanel2 = new JPanel(new BorderLayout());
+
+ Box resourceBox = new Box(BoxLayout.Y_AXIS);
+ Box fileBox = new Box(BoxLayout.Y_AXIS);
+ Box groupBox = new Box(BoxLayout.X_AXIS);
+
+ // Setup title
+ titleLabel.setFont(new Font("Serif",Font.BOLD,16));
+
+ // Setup panels
+ midPanel.setBorder(BorderFactory.createTitledBorder(Resources.getTranslation("import_options")));
+ resourcePanel.setBorder(BorderFactory.createTitledBorder(Resources.getTranslation("import_resource_conflicts")));
+ filePanel.setBorder(BorderFactory.createTitledBorder(Resources.getTranslation("import_file_conflicts")));
+ defaultPanel.setBorder(BorderFactory.createTitledBorder(Resources.getTranslation("import_default_values")));
+
+ // Arrange button groups
+ fileGroup.add(fileGeneratePopulateRadio);
+ fileGroup.add(fileGenerateEmptyRadio);
+ fileGroup.add(fileIgnoreRadio);
+ fileGroup.add(filePromptRadio);
+ resourceGroup.add(resourceOverwriteRadio);
+ resourceGroup.add(resourceIgnoreRadio);
+ resourceGroup.add(resourcePromptRadio);
+
+ // Add action listeners
+ cancelButton.addActionListener(new ActionListener(){
+ public void actionPerformed(ActionEvent ev) {
+ thisWindowClosing();
+ }
+ });
+
+ importButton.addActionListener(new ActionListener(){
+ public void actionPerformed(ActionEvent ev) {
+ try {
+ beginImport();
+ gui.updateProjectTree();
+ gui.updateDisplayTree();
+ thisWindowClosing();
+ } catch (IOException ioe) {
+ ioe.printStackTrace(System.err);
+ JOptionPane.showMessageDialog(null,
+ Resources.getTranslation("error") + "\n" + ioe.getLocalizedMessage(),
+ Resources.getTranslation("error"), JOptionPane.ERROR_MESSAGE);
+ }
+ }
+ });
+
+ fileChooseButton.addActionListener(new ActionListener(){
+ public void actionPerformed(ActionEvent ev) {
+ chooseFile();
+ }
+ });
+
+ // Setup combo box
+ Bundle baseBundle = ((Bundle)rbm.getBundles().elementAt(0));
+ BundleGroup ungroupedGroup = baseBundle.getUngroupedGroup();
+ groupComboBox = new JComboBox(baseBundle.getGroupsAsVector());
+ int groupComboBoxCount = groupComboBox.getItemCount();
+ for (int selectedIndex = 0; selectedIndex < groupComboBoxCount; selectedIndex++) {
+ BundleGroup bundGroup = ((BundleGroup)groupComboBox.getItemAt(selectedIndex));
+ if (bundGroup.getName().equals(ungroupedGroup.getName())) {
+ // By default, use the ungrouped group. Probably named 'Ungrouped Items'.
+ groupComboBox.setSelectedIndex(selectedIndex);
+ break;
+ }
+ }
+
+ // Arange components
+ groupBox.add(Box.createHorizontalGlue());
+ groupBox.add(insertGroupLabel);
+ groupBox.add(Box.createHorizontalStrut(5));
+ groupBox.add(groupComboBox);
+
+ defaultPanel2.add(groupBox, BorderLayout.NORTH);
+ defaultPanel2.add(markTranslatedCheck, BorderLayout.CENTER);
+ defaultPanel2.add(createGroupsCheck, BorderLayout.SOUTH);
+
+ fileBox.add(fileGeneratePopulateRadio);
+ fileBox.add(fileGenerateEmptyRadio);
+ fileBox.add(fileIgnoreRadio);
+ fileBox.add(filePromptRadio);
+
+ resourceBox.add(resourceOverwriteRadio);
+ resourceBox.add(resourceIgnoreRadio);
+ resourceBox.add(resourcePromptRadio);
+
+ defaultPanel.add(defaultPanel2);
+ filePanel.add(fileBox);
+ resourcePanel.add(resourceBox);
+
+ midBox.add(resourcePanel);
+ midBox.add(filePanel);
+ midBox.add(defaultPanel);
+
+ midPanel.add(midBox, BorderLayout.CENTER);
+
+ topInnerPanel.add(sourceLabel, BorderLayout.CENTER);
+ topInnerPanel.add(fileChooseButton, BorderLayout.EAST);
+
+ topPanel.add(titleLabel, BorderLayout.NORTH);
+ topPanel.add(topInnerPanel, BorderLayout.CENTER);
+
+ botPanel.add(cancelButton);
+ botPanel.add(importButton);
+
+ getContentPane().setLayout(new BorderLayout());
+ getContentPane().add(topPanel, BorderLayout.NORTH);
+ getContentPane().add(midPanel, BorderLayout.CENTER);
+ getContentPane().add(botPanel, BorderLayout.SOUTH);
+
+ pack();
+ }
+
+ protected void thisWindowClosing() {
+ setVisible(false);
+ dispose();
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBJavaExporter.java b/unicodetools/com/ibm/rbm/RBJavaExporter.java
new file mode 100644
index 0000000..c1648fc
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBJavaExporter.java
@@ -0,0 +1,203 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import java.awt.*;
+import java.awt.event.*;
+import javax.swing.*;
+import javax.swing.border.*;
+import java.util.*;
+
+/**
+ * An exporter plug-in class for RBManager. The resources exported here conform to
+ * the Java standard for Resource Bundles as specified in java.util.ListResourceBundle.
+ * The output files are compilable java files that are not associated with any
+ * package.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBJavaExporter extends RBExporter {
+ private String packageName = null;
+ private boolean publicClass = true;
+ private boolean publicMethods = true;
+
+
+ public RBJavaExporter() {
+ super();
+
+ // Initialize the file chooser if necessary
+ if (chooser == null) {
+ chooser = new JFileChooser();
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public String getDescription() {
+ return "Java Source Files";
+ }
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().endsWith(".java") && f.getName().indexOf("_") < 0) return true;
+ return false;
+ }
+ });
+ }
+ }
+
+ public void export(RBManager rbm) throws IOException {
+ if (rbm == null) return;
+ // Open the additional Dialog
+ RBJavaExporterDialog parametersDialog = new RBJavaExporterDialog();
+ packageName = parametersDialog.getPackageName();
+ publicClass = parametersDialog.isClassPublic();
+ publicMethods = parametersDialog.isMethodsPublic();
+
+ // Open the Save Dialog
+ int ret_val = chooser.showSaveDialog(null);
+ if (ret_val != JFileChooser.APPROVE_OPTION) return;
+ // Retrieve basic file information
+ File file = chooser.getSelectedFile(); // The file(s) we will be working with
+ File directory = new File(file.getParent()); // The directory we will be writing to
+ String base_name = file.getName(); // The base name of the files we will write
+ if (base_name == null || base_name.equals("")) base_name = rbm.getBaseClass();
+ if (base_name.endsWith(".java")) base_name = base_name.substring(0,base_name.length()-5);
+
+ Vector bundle_v = rbm.getBundles();
+ for (int i=0; i < bundle_v.size(); i++) {
+ Bundle bundle = (Bundle)bundle_v.elementAt(i);
+ String base_enc = base_name;
+ if (bundle.encoding != null && !bundle.encoding.equals("")) base_enc = base_enc + "_" + bundle.encoding;
+ String file_name = base_enc + ".java";
+
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("/* File: " + file_name + "\n");
+ buffer.append(" * Date: " + (new Date()) + "\n");
+ buffer.append(" * Comment: This file was generated automatically by RBManager" + "\n");
+ buffer.append(" */\n\n");
+ if (packageName != null) {
+ buffer.append("package " + packageName + ";\n\n");
+ }
+ buffer.append("import java.util.ListResourceBundle;\n\n");
+ buffer.append((publicClass ? "public " : "protected "));
+ buffer.append("class " + base_enc + " extends ListResourceBundle {\n");
+ buffer.append("\t" + (publicMethods ? "public" : "protected") + " Object[][] getContents() {\n");
+ buffer.append("\t\treturn contents;\n");
+ buffer.append("\t}\n");
+ buffer.append("\tprivate static final Object[][] contents = {\n");
+ buffer.append("\t// LOCALIZE THIS\n");
+
+ Vector group_v = bundle.getGroupsAsVector();
+ for (int j=0; j < group_v.size(); j++) {
+ BundleGroup group = (BundleGroup)group_v.elementAt(j);
+ Vector item_v = group.getItemsAsVector();
+ for (int k=0; k < item_v.size(); k++) {
+ BundleItem item = (BundleItem)item_v.elementAt(k);
+ buffer.append("\t\t{\"" + item.getKey() + "\", \"" + item.getTranslation() + "\"},\t// " + item.getComment() + "\n");
+ } // end for - k
+ } // end for - j
+
+ buffer.append("\t// END OF MATERIAL TO LOCALIZE\n");
+ buffer.append("\t};\n");
+ buffer.append("}");
+
+ // Write out the file
+ File write_file = new File(directory, file_name);
+ FileWriter writer = new FileWriter(write_file);
+ writer.write(buffer.toString());
+ writer.flush();
+ writer.close();
+ } // end for - i
+ }
+}
+
+class RBJavaExporterDialog extends JDialog {
+ JCheckBox packageCheck;
+ JRadioButton classPublicRadio;
+ JRadioButton classProtectedRadio;
+ JRadioButton methodsPublicRadio;
+ JRadioButton methodsProtectedRadio;
+ JTextField packageField;
+
+ public RBJavaExporterDialog() {
+ super(new JFrame(), Resources.getTranslation("dialog_title_export_java_options"), true);
+ initComponents();
+ }
+
+ public String getPackageName() {
+ if (!(packageCheck.isSelected())) return null;
+ String retVal = packageField.getText();
+ if (retVal == null || retVal.trim().equals("")) return null;
+ return retVal.trim();
+ }
+
+ public boolean isClassPublic() {
+ return classPublicRadio.isSelected();
+ }
+
+ public boolean isMethodsPublic() {
+ return methodsPublicRadio.isSelected();
+ }
+
+ private void handleClose() {
+ setVisible(false);
+ dispose();
+ }
+
+ private void initComponents() {
+ getContentPane().setLayout(new BorderLayout());
+ getContentPane().removeAll();
+
+ packageCheck = new JCheckBox(Resources.getTranslation("export_java_package"), false);
+ classPublicRadio = new JRadioButton(Resources.getTranslation("export_java_class_public"), true);
+ classProtectedRadio = new JRadioButton(Resources.getTranslation("export_java_class_protected"), false);
+ methodsPublicRadio = new JRadioButton(Resources.getTranslation("export_java_class_public"), true);
+ methodsProtectedRadio = new JRadioButton(Resources.getTranslation("export_java_class_protected"), false);
+ packageField = new JTextField();
+ packageField.setColumns(30);
+
+ JButton okButton = new JButton(Resources.getTranslation("OK"));
+ JLabel titleLabel = new JLabel(Resources.getTranslation("export_java_title"), SwingConstants.LEFT);
+
+ JPanel okPanel = new JPanel();
+ okPanel.add(okButton);
+ JPanel centerPanel = new JPanel(new GridLayout(1,1));
+ centerPanel.setBorder(BorderFactory.createBevelBorder(BevelBorder.RAISED));
+ Box centerBox = Box.createVerticalBox();
+ Box packageBox = Box.createHorizontalBox();
+ packageBox.add(packageCheck);
+ packageBox.add(packageField);
+ centerBox.add(packageBox);
+ centerBox.add(new JSeparator());
+ centerBox.add(classPublicRadio);
+ centerBox.add(classProtectedRadio);
+ centerBox.add(new JSeparator());
+ centerBox.add(methodsPublicRadio);
+ centerBox.add(methodsProtectedRadio);
+ centerPanel.add(centerBox);
+
+ getContentPane().add(titleLabel, BorderLayout.NORTH);
+ getContentPane().add(okPanel, BorderLayout.SOUTH);
+ getContentPane().add(centerPanel, BorderLayout.CENTER);
+
+ okButton.addActionListener(new ActionListener(){
+ public void actionPerformed(ActionEvent ev) {
+ handleClose();
+ }
+ });
+
+ ButtonGroup classGroup = new ButtonGroup();
+ ButtonGroup methodsGroup = new ButtonGroup();
+ classGroup.add(classPublicRadio);
+ classGroup.add(classProtectedRadio);
+ methodsGroup.add(methodsPublicRadio);
+ methodsGroup.add(methodsProtectedRadio);
+
+ //validateTree();
+ pack();
+ //setLocation(new Point(25,25));
+ setVisible(true);
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBJavaImporter.java b/unicodetools/com/ibm/rbm/RBJavaImporter.java
new file mode 100644
index 0000000..c7d83bc
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBJavaImporter.java
@@ -0,0 +1,84 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+
+import java.io.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+import java.util.*;
+import java.net.*;
+
+/**
+ * This is the super class for all importer plug-in classes. As of yet, there
+ * is little contained in this class.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBJavaImporter extends RBImporter {
+
+ public RBJavaImporter(String title, RBManager rbm, RBManagerGUI gui) {
+ super(title, rbm, gui);
+ }
+
+ protected void setupFileChooser() {
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().endsWith(".class") && f.getName().indexOf("_") < 0) return true;
+ return false;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("import_java_file_description");
+ }
+ });
+ }
+
+ protected void beginImport() throws IOException {
+ super.beginImport();
+ ListResourceBundle base_lrb = null;
+ URLClassLoader urlLoader = null;
+ try {
+ File baseFile = getChosenFile();
+ URL baseURL = baseFile.toURL();
+ URL urls[] = new URL[1];
+ urls[0] = baseURL;
+ urlLoader = new URLClassLoader(urls);
+ String baseName = baseFile.getName();
+ baseName = baseName.substring(0, baseName.indexOf(".class"));
+
+ Class baseClass = urlLoader.loadClass(baseName);
+ base_lrb = (ListResourceBundle)baseClass.newInstance();
+ } catch (Exception e) {
+ RBManagerGUI.debugMsg(e.toString());
+ RBManagerGUI.debugMsg(e.getMessage());
+ e.printStackTrace(System.err);
+ }
+ if (base_lrb != null) {
+ Enumeration enum = base_lrb.getKeys();
+ while (enum.hasMoreElements()) {
+ String key = enum.nextElement().toString();
+ RBManagerGUI.debugMsg("Resource -> " + key + " = " + base_lrb.getString(key));
+ }
+ }
+ }
+}
+
+/*
+class myClassLoader extends ClassLoader {
+ public myClassLoader() {
+ super();
+ }
+
+ public Class myDefineClass(String name, byte array[], int off, int len) {
+ return super.defineClass(name, array, off, len);
+ }
+}
+*/
diff --git a/unicodetools/com/ibm/rbm/RBManager.java b/unicodetools/com/ibm/rbm/RBManager.java
new file mode 100644
index 0000000..c3324f9
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBManager.java
@@ -0,0 +1,946 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.util.*;
+import java.io.*;
+import javax.swing.UIManager;
+import javax.swing.JOptionPane;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+/**
+ * A utility class to aid in the process of updating the Natural Language Support of Tempus Fugit.
+ * This class scans the directory containing NLS files and checks the various languages found there
+ * for completeness, duplication of entry, and status of translation. The class can be instantiated
+ * through a constructor, or it can be run from the command line. For additional information on the
+ * command line results, see the <CODE>main</CODE> method.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBManager {
+
+ // *** DATA ***
+ private Vector allBundleKeys; // A Vector of Strings with all defined NLS properties
+ private Vector bundles; // A Vector of NLSbundles, one for each language
+ private String currentUser; // The name of the person currently using the editor
+ private String baseClass; // The name of the base class of the active resource bundle
+ private File currentDirectory;
+
+ // *** CONSTRUCTORS ***
+
+ // The default constructor is not publicly available
+ private RBManager() {
+ try {
+ // Look and Feel check
+ try {
+ String laf = Preferences.getPreference("lookandfeel");
+ if (!laf.equals("")) UIManager.setLookAndFeel(laf);
+ } catch (Exception e) {
+ // Ignored
+ }
+
+ Resources.initBundle();
+ RBManagerGUI guiFrame = new RBManagerGUI();
+ if (!Preferences.getPreference("username").equals(""))
+ guiFrame.setUser(Preferences.getPreference("username"));
+ if (!Preferences.getPreference("locale").equals("")) {
+ String localeStr = Preferences.getPreference("locale");
+ String language = Resources.getLanguage(localeStr);
+ String country = Resources.getCountry(localeStr);
+ String variant = Resources.getVariant(localeStr);
+ if (language == null || language.equals("") || language.length() > 3) language = "en";
+ if (country == null) country = new String();
+ if (variant == null) Resources.setLocale(new Locale(language, country));
+ else Resources.setLocale(new Locale(language, country, variant));
+ }
+ Resources.initBundle();
+ guiFrame.initComponents();
+ guiFrame.setVisible(true);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * This constructor creates an entirely blank RBManager and base Bundle. Only the base class name is defined.
+ * All other properties need to be defined.
+ */
+
+ public RBManager(String baseClassName) {
+ allBundleKeys = new Vector();
+ bundles = new Vector();
+ currentUser = "Unknown";
+ baseClass = baseClassName;
+ currentDirectory = new File("");
+
+ Bundle mainBundle = new Bundle("");
+ // Create a default group
+ mainBundle.addBundleGroup("Ungrouped Items", "These are resource items that have not been assigned a group");
+ bundles.addElement(mainBundle);
+ }
+
+ /**
+ * This is the standard constructor for RBManager. It is constructed from the root of a resource bundle.
+ * In the current implementation, each file is parsed separately starting with the base class file (root).
+ * In this implementation, the lookup keys are represented to the user as they appear in the files. The
+ * translation values however are translated according to the basic rules defined in java.util.Properties.
+ * Thus in the key, the user may see '\"' when in the value it would have been converted to '"'. This
+ * translation is reversed when saving the resource bundle.
+ * @param mainFile The base class file of the resource bundle to be read
+ */
+
+ public RBManager(File mainFile) throws FileNotFoundException, IOException {
+ init();
+
+ currentDirectory = new File(mainFile.getParent());
+
+ String[] encodings;
+
+ // Initiailize the readers to the main NLS file
+ FileReader fr = new FileReader(mainFile);
+ BufferedReader br = new BufferedReader(fr);
+
+ // Load the java readable values from the main NLS file;
+ Properties p = new Properties();
+ p.load(new FileInputStream(mainFile));
+
+ // Count the number of language files and set up the encoding and dictionary data
+ int numLanguages = 1;
+ String NLSbaseClass = null;
+ String NLSpostfix = null;
+
+ if (mainFile.getName().indexOf(".") >= 0) {
+ NLSbaseClass = mainFile.getName().substring(0,mainFile.getName().indexOf("."));
+ NLSpostfix = ".properties";
+ } else {
+ NLSbaseClass = mainFile.getName();
+ NLSpostfix = "";
+ }
+
+ baseClass = NLSbaseClass;
+
+ String filePrefix = mainFile.getName().substring(0,mainFile.getName().lastIndexOf("."));
+ String filePostfix = mainFile.getName().substring(mainFile.getName().lastIndexOf("."),mainFile.getName().length());
+ File resDir = currentDirectory;
+ if (resDir != null && resDir.isDirectory()) {
+ String[] temp = resDir.list();
+ numLanguages = 0;
+ // Count the number of language files
+ for (int i = 0; i < temp.length; i++) {
+ if (temp[i].startsWith(NLSbaseClass) && (temp[i].endsWith(NLSpostfix)
+ || temp[i].endsWith(NLSpostfix.toUpperCase()) || NLSpostfix.equals(""))) {
+ // Starts with the base class name and ends in proper suffix (above)
+ // Base name is followed by . or _ (below)
+ RBManagerGUI.debugMsg("Character is: " + temp[i].charAt(NLSbaseClass.length()));
+ if (temp[i].charAt(NLSbaseClass.length()) == '.' || temp[i].charAt(NLSbaseClass.length()) == '_')
+ numLanguages++;
+ }
+ }
+ // Initialize the bundles and encodings
+ encodings = new String[numLanguages];
+
+ int count = 1;
+ for (int i = 0; i < temp.length; i++) {
+ if (temp[i].equals(mainFile.getName())) {
+ encodings[0] = "";
+ } else if (temp[i].startsWith(NLSbaseClass) && (temp[i].endsWith(NLSpostfix)
+ || temp[i].endsWith(NLSpostfix.toUpperCase()) || NLSpostfix.equals(""))) {
+ if (temp[i].charAt(NLSbaseClass.length()) == '.' || temp[i].charAt(NLSbaseClass.length()) == '_') {
+ encodings[count] = new String(temp[i].substring(filePrefix.length()+1,temp[i].indexOf(filePostfix))); count++;
+ }
+ }
+ }
+ } else {
+ // Initialize the bundles and encodings in case the directory information is not available
+ // In this case, only the main NLS file will be handled
+ encodings = new String[numLanguages];
+ encodings[0] = new String("");
+ } // end the count and initialization
+
+ // Read in the entries from the main file
+ String line;
+ // Set the dictionary for the main file
+ Bundle dict = new Bundle(encodings[0]);
+ bundles.addElement(dict);
+ // Set up the first group in case there are NLS items which were not assigned to a group
+ BundleGroup group = new BundleGroup(dict, "Ungrouped Items");
+ group.setComment("NLS Items which were not initially assigned to a group");
+ dict.addBundleGroup(group);
+ BundleItem item = new BundleItem(group,null,null);
+ int count = 0;
+ while ((line = br.readLine()) != null) {
+ // Test to make sure this is a file that was generated by RBManager
+ if (!line.trim().equals("")) count++;
+ if (count == 1 && !line.startsWith("# @file")) {
+ // Not generated by RBManager
+ JOptionPane.showMessageDialog(null,
+ Resources.getTranslation("error_not_rbmanager_format") + "\n" + Resources.getTranslation("error_suggest_import_properties"),
+ Resources.getTranslation("dialog_title_error_not_rbmanager_format"), JOptionPane.ERROR_MESSAGE);
+ throw new FileNotFoundException("Improper format for file: " + mainFile.getName());
+ }
+ String commentLine = null;
+ // Grab text following the # sign
+ if (line.indexOf("#") >= 0) {
+ commentLine = line.substring(line.indexOf("#")+1,line.length());
+ line = line.substring(0,line.indexOf("#"));
+ }
+ if (commentLine != null && commentLine.trim().length() > 0) {
+ // Process any information made available in comment '@' information
+ Hashtable descriptors = getDescriptors(null,commentLine);
+ if (descriptors != null) {
+ Object o;
+ // File tags
+ o = descriptors.get("file"); if (o != null) dict.name = ((String) o);
+ o = descriptors.get("fileComment"); if (o != null) dict.comment = ((String) o);
+ o = descriptors.get("fileLanguage"); if (o != null) dict.language = ((String) o);
+ o = descriptors.get("fileCountry"); if (o != null) dict.country = ((String) o);
+ o = descriptors.get("fileVariant"); if (o != null) dict.variant = ((String) o);
+ o = descriptors.get("fileManager"); if (o != null) dict.manager = ((String) o);
+
+ // Group tags
+ o = descriptors.get("group");
+ if (o != null) {
+ group = new BundleGroup(dict, (String)o);
+ item.setParentGroup(group);
+ dict.addBundleGroup(group);
+ }
+ o = descriptors.get("groupComment"); if (o != null) group.setComment((String) o);
+
+ // Item tags
+ o = descriptors.get("comment"); if (o != null) item.setComment((String) o);
+ o = descriptors.get("translated"); if (o != null) item.setTranslated(((String) o).equalsIgnoreCase("true"));
+ o = descriptors.get("creator"); if (o != null) item.setCreator((String) o);
+ o = descriptors.get("modifier"); if (o != null) item.setModifier((String) o);
+ o = descriptors.get("created"); if (o != null) item.setCreatedDate((String) o);
+ o = descriptors.get("modified"); if (o != null) item.setModifiedDate((String) o);
+
+ // Lookup tags (e.g. {_#_} _description_)
+ Enumeration keys = descriptors.keys();
+ while (keys.hasMoreElements()) {
+ String tag = (String)keys.nextElement();
+ if (tag.startsWith("{")) {
+ if (tag.indexOf("}") < 0) continue;
+ String lookup = tag.substring(1,tag.indexOf("}"));
+ item.getLookups().put(lookup, descriptors.get(tag));
+ }
+ }
+ }
+ } // end check of comment line
+ if (line.trim().length() < 1) continue;
+
+ // Grab the name and value (translation) from the line
+ int breakpoint = 0;
+ boolean started = false;
+ char array[] = line.toCharArray();
+ for (int i=0; i < array.length; i++) {
+ if (!started && array[i] != ' ' && array[i] != '\t') started = true;
+ if (started && (array[i] == '=' || array[i] == ':' || array[i] == ' ' || array[i] == '\t')) {
+ breakpoint = i;
+ break;
+ }
+ }
+ String key = String.valueOf(array,0,breakpoint);
+
+ item.setKey(key);
+ String translation = p.getProperty(key);
+ if (translation == null || translation.equals(""))
+ item.setTranslation(line.substring(line.indexOf("=")+1,line.length()).trim());
+ else item.setTranslation(translation);
+
+ dict.addBundleItem(item);
+ item = new BundleItem(group,null,null);
+ } // end while - main NLS file
+
+ // Now that we have parsed the entire main language file, populate the allNLSKey set with the dictionary keys
+ allBundleKeys = new Vector();
+ Enumeration enum = ((Bundle)bundles.elementAt(0)).allItems.keys();
+ while (enum.hasMoreElements()) {
+ allBundleKeys.addElement(enum.nextElement());
+ }
+
+ // Now go through all of the other languages
+ for (int i = 1; i < encodings.length; i++) {
+ if (encodings[i].equals("kr")) continue; // I can't handle double byte character sets yet
+ // Try to obtain the new file
+ File tempFile = new File(resDir, NLSbaseClass + "_" + encodings[i] + NLSpostfix);
+ fr = new FileReader(tempFile);
+ br = new BufferedReader(fr);
+
+ // Try to obtain the java readable properties for the file
+ p = new Properties();
+ p.load(new FileInputStream(tempFile));
+
+ // Set the dictionary for the main file
+ dict = new Bundle(encodings[i]);
+ bundles.addElement(dict);
+ // Set up the first group in case there are NLS items which were not assigned to a group
+ group = new BundleGroup(dict, "Ungrouped Items");
+ dict.addBundleGroup(group);
+ group.setComment("NLS Items which were not initially assigned to a group");
+ item = new BundleItem(group,null,null);
+ // Create the rest of the groups
+ while ((line = br.readLine()) != null) {
+ String commentLine = null;
+ // Grab the text following the # sign
+ if (line.indexOf("#") >= 0) {
+ commentLine = line.substring(line.indexOf("#")+1,line.length());
+ line = line.substring(0,line.indexOf("#"));
+ }
+ if (commentLine != null && commentLine.trim().length() > 0) {
+ // Process any information made available in comment '@' information
+ Hashtable descriptors = getDescriptors(null,commentLine);
+ if (descriptors != null) {
+ Object o;
+ // File tags
+ o = descriptors.get("file"); if (o != null) dict.name = ((String) o);
+ o = descriptors.get("fileComment"); if (o != null) dict.comment = ((String) o);
+ o = descriptors.get("fileLanguage"); if (o != null) dict.language = ((String) o);
+ o = descriptors.get("fileCountry"); if (o != null) dict.country = ((String) o);
+ o = descriptors.get("fileVariant"); if (o != null) dict.variant = ((String) o);
+ o = descriptors.get("fileManager"); if (o != null) dict.manager = ((String) o);
+
+ // Group tags
+ o = descriptors.get("group");
+ if (o != null) {
+ group = new BundleGroup(dict, (String)o);
+ item.setParentGroup(group);
+ dict.addBundleGroup(group);
+ }
+ o = descriptors.get("groupComment"); if (o != null) group.setComment((String) o);
+
+ // Item tags
+ o = descriptors.get("comment"); if (o != null) item.setComment((String) o);
+ o = descriptors.get("translated"); if (o != null) item.setTranslated(((String) o).equalsIgnoreCase("true"));
+ o = descriptors.get("creator"); if (o != null) item.setCreator((String) o);
+ o = descriptors.get("modifier"); if (o != null) item.setModifier((String) o);
+ o = descriptors.get("created"); if (o != null) item.setCreatedDate((String) o);
+ o = descriptors.get("modified"); if (o != null) item.setModifiedDate((String) o);
+
+ // Lookup tags (e.g. {_#_} _description_)
+ Enumeration keys = descriptors.keys();
+ while (keys.hasMoreElements()) {
+ String tag = (String)keys.nextElement();
+ if (tag.startsWith("{")) {
+ if (tag.indexOf("}") < 0) continue;
+ String lookup = tag.substring(1,tag.indexOf("}"));
+ item.getLookups().put(lookup, descriptors.get(tag));
+ }
+ }
+ }
+ } // end check of comment line
+ if (line.trim().length() < 1) continue;
+
+ // Grab the name and value (translation) from the line
+ int breakpoint = 0;
+ boolean started = false;
+ char array[] = line.toCharArray();
+ for (int j=0; j < array.length; j++) {
+ if (!started && array[j] != ' ' && array[j] != '\t') started = true;
+ if (started && (array[j] == '=' || array[j] == ':' || array[j] == ' ' || array[j] == '\t')) {
+ breakpoint = j;
+ break;
+ }
+ }
+ String key = String.valueOf(array,0,breakpoint);
+ item.setKey(key);
+ String translation = p.getProperty(key);
+ if (translation == null || translation.equals(""))
+ item.setTranslation(line.substring(line.indexOf("=")+1,line.length()).trim());
+ else item.setTranslation(translation);
+
+ dict.addBundleItem(item);
+ item = new BundleItem(group,null,null);
+ } // end while - next line
+ } // end for looop through languages
+ // Add this opened file to our recent files
+ Preferences.addRecentFilePreference(mainFile.getName(), mainFile.getAbsolutePath());
+ } // end RBManager()
+
+ // *** METHODS ***
+
+ /**
+ * Main
+ */
+
+ public static void main(String args[]) {
+ // Make sure the user specified a path
+ if (args.length < 1) {
+ new RBManager();
+ return;
+ }
+ } // main
+
+ public String toString() { return baseClass; }
+
+ /**
+ * Write the contents of the file to the output stream
+ */
+
+ public void writeToFile() throws IOException {
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ File outputFile = new File(currentDirectory, baseClass +
+ ((bundle.encoding == null || bundle.encoding.equals("")) ? "" : "_" + bundle.encoding) +
+ ".properties");
+ FileWriter fw = new FileWriter(outputFile);
+ bundle.writeContents(fw);
+ fw.flush();
+ fw.close();
+ }
+ // In case this is a newly created bundle or the location has changed recently, update the recent files, preference
+ Preferences.addRecentFilePreference(baseClass + ".properties", currentDirectory.getAbsolutePath() + File.separator +
+ baseClass + ".properties");
+ }
+
+ /**
+ * Calling this method removes a resource from the resource bundle. This method does not permanently
+ * erase the file containing the resources at this encoding, however any changes or saves that take
+ * place once this file has been removed will not be reflected in this hidden file. To restore the resource,
+ * the bundle will have to be recreated. (This last point may change)
+ */
+
+ public void hideResource(String encoding) {
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ if (bundle.encoding.equals(encoding)) {
+ bundles.removeElement(bundle);
+ break;
+ }
+ }
+ }
+
+ /**
+ * Erases permanently one of the resource files. Be careful about calling this method there is nothing you can do
+ * once a file is erased.
+ */
+
+ public void eraseFile(String encoding) throws IOException {
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ if (!(bundle.encoding.equals(encoding))) continue;
+ File outputFile = new File(currentDirectory, baseClass +
+ ((bundle.encoding == null || bundle.encoding.equals("")) ? "" : "_" + bundle.encoding) +
+ ".properties");
+ boolean success = outputFile.delete();
+ if (!success) throw new IOException(Resources.getTranslation("error_deletion_not_possible"));
+ hideResource(encoding);
+ break;
+ }
+ }
+
+ /**
+ * Writes only one of the resource files to the file system. This file is specified by the encoding parameter
+ */
+
+ public void writeToFile(String encoding) throws IOException {
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ if (bundle.encoding.equals(encoding) || (i==0 && encoding.equals(""))) {
+ File outputFile = new File(currentDirectory, baseClass +
+ ((bundle.encoding == null || bundle.encoding.equals("")) ? "" : "_" + bundle.encoding) +
+ ".properties");
+ FileWriter fw = new FileWriter(outputFile);
+ bundle.writeContents(fw);
+ fw.flush();
+ fw.close();
+ break;
+ }
+ }
+ // In case this is a newly created bundle or the location has changed recently, update the recent files, preference
+ Preferences.addRecentFilePreference(baseClass + ".properties", currentDirectory.getAbsolutePath() + File.separator +
+ baseClass + ".properties");
+ }
+
+ /**
+ * Given a BundleItem and some properties to change for that item, this method first checks to make sure the passed
+ * item is valid and if it is, the properties of that item are changed to reflect those passed in as parameters to this
+ * method.
+ * @return true if the BundleItem was valid and updateable, false if otherwise (in this case no changes were made).
+ */
+
+ public boolean editItem(BundleItem item, String name, String value, String groupName, String comment, Hashtable lookups) {
+ if (name == null || name.equals("") || groupName == null || groupName.equals("") || item == null) return false;
+ String oldName = item.getKey();
+ String oldComment = item.getComment();
+ String oldValue = item.getTranslation();
+ //String oldGroupName = item.getParentGroup().getName();
+ // Loop through the bundles
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ BundleItem oldItem = (BundleItem)bundle.allItems.get(oldName);
+ if (oldItem == null) break;
+ if (!oldName.equals(name)) {
+ // A new key
+ oldItem.setKey(name);
+ bundle.allItems.remove(oldItem);
+ bundle.allItems.put(oldItem.getKey(), oldItem);
+ }
+ if (oldItem.getComment() == null || oldItem.getComment().equals(oldComment)) oldItem.setComment(comment);
+ if (oldItem.getTranslation().equals(oldValue)) oldItem.setTranslation(value);
+ oldItem.setLookups(lookups);
+ if (!oldItem.getParentGroup().getName().equals(groupName)) {
+ // A new group
+ oldItem.getParentGroup().removeBundleItem(oldItem.getKey());
+ BundleGroup bg = bundle.getBundleGroup(groupName);
+ if (bg == null) bg = bundle.getUngroupedGroup();
+ oldItem.setParentGroup(bg);
+ bg.addBundleItem(oldItem);
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Attempts to create a new item in each of the language files. The method first checks the base Resource Bundle
+ * to make sure that the item name does not all ready exist. If it does exist the item is not created.
+ * @param name The unique key of the item
+ * @param value The translation of the item for the base class
+ * @param groupName The group name, should all ready exist in the base class
+ * @param comment An optional comment to be added to the item, can be <CODE>null</CODE>
+ * @return An error response. If the creation was successful <CODE>true</CODE> is returned, if there was an error <CODE>false</CODE> is returned.
+ */
+
+ public boolean createItem(String name, String value, String groupName, String comment, Hashtable lookups) {
+ if (name == null || name.equals("") || groupName == null || groupName.equals("")) return false;
+ Bundle mainBundle = (Bundle)bundles.firstElement();
+ BundleGroup mainGroup = null;
+ if (mainBundle.allItems.containsKey(name)) return false;
+ for (int i=0; i < mainBundle.getGroupCount(); i++) {
+ BundleGroup bg = mainBundle.getBundleGroup(i);
+ if (bg.getName().equals(groupName)) {mainGroup = bg; break;}
+ }
+ if (mainGroup == null) return false;
+ // Add to the base class
+ BundleItem mainItem = new BundleItem(mainGroup, name, value);
+ mainItem.setTranslated(true);
+ mainItem.setCreator(currentUser);
+ mainItem.setModifier(currentUser);
+ mainItem.setComment(comment);
+ mainBundle.allItems.put(name, mainItem);
+ mainGroup.addBundleItem(mainItem);
+ if (lookups != null) mainItem.setLookups(lookups);
+ // Add to the rest of the bundles
+ for (int i=1; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ // Find the group
+ BundleGroup group = null;
+ for (int j=0; j < bundle.getGroupCount(); j++) {
+ BundleGroup bg = bundle.getBundleGroup(j);
+ if (bg.getName().equals(groupName)) {group = bg; break;}
+ }
+ if (group == null) {
+ group = new BundleGroup(bundle, groupName);
+ bundle.addBundleGroup(group);
+ }
+ BundleItem item = new BundleItem(group, name, value);
+ item.setCreator(currentUser);
+ item.setModifier(currentUser);
+ item.setComment(comment);
+ if (lookups != null) item.setLookups(lookups);
+ bundle.allItems.put(name, item);
+ bundle.addUntranslatedItem(item);
+ group.addBundleItem(item);
+ }
+ return true;
+ }
+
+ /**
+ * Attempts to create a new group in each of the language files. The method first checks the base Resource Bundle
+ * to make sure that the group name does not all ready exist. If it does exist the group is not created.
+ * @param groupName The unique group name to be created
+ * @param groupComment An optional comment to be added to the group, can be <CODE>null</CODE>
+ * @return An error response. If the creation was successful <CODE>true</CODE> is returned, if there was an error <CODE>false</CODE> is returned.
+ */
+ public boolean createGroup(String groupName, String groupComment) {
+ if (groupName == null || groupName.equals(""))
+ return false;
+ // Check to see if the group exists
+ Bundle mainBundle = (Bundle)bundles.firstElement();
+ if (mainBundle.hasGroup(groupName))
+ return false;
+
+ // Create the group
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ BundleGroup bg = new BundleGroup(bundle, groupName);
+ if (groupComment != null)
+ bg.setComment(groupComment);
+ bundle.addBundleGroup(bg);
+ }
+ return true;
+ }
+
+ /**
+ * Removes a group and all of the items within that group from the various
+ * Resource Bundles known to the system. This method removes the group from
+ * the protected vector of groups, then removes all items in that group from
+ * the protected vector of untranslated items, and the protected hashtable of
+ * all items.
+ */
+
+ public void deleteGroup(String groupName) {
+ if (groupName == null) return;
+ // Loop through all of the bundles;
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ bundle.removeGroup(groupName);
+ }
+ }
+
+ /**
+ * Remove resource items of the given name from each of the resource bundles that the system
+ * knows about. This works by first removing the item from the protected vector of translated
+ * items, if it is there, and then removing it from the the hashtable of all items, and then
+ * removing it from its respective group.
+ */
+
+ public void deleteItem(String itemName) {
+ if (itemName == null) return;
+ // Loop through all of the bundles;
+ for (int i=0; i < bundles.size(); i++) {
+ // Loop through untranslated items
+ Bundle bundle = (Bundle)bundles.elementAt(i);
+ bundle.removeUntranslatedItem(itemName);
+
+ // Loop through all Items
+ Enumeration enum = bundle.allItems.elements();
+ while(enum.hasMoreElements()) {
+ BundleItem item = (BundleItem)enum.nextElement();
+ if (item.getKey().equals(itemName)) {
+ bundle.allItems.remove(item);
+ item.getParentGroup().removeBundleItem(item.getKey());
+ }
+ }
+ }
+ }
+
+ /**
+ * Looks through the resources contained in the bundle for a resource of the given encoding. Note that this
+ * search is case sensitive.
+ * @return True if the encoding exists as one of the resource files, false otherwise
+ */
+
+ public boolean hasResource(String encoding) {
+ // Check to see if the encoding exists
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle b = (Bundle)bundles.elementAt(i);
+ if (b.encoding.equals(encoding)) return true;
+ }
+ return false;
+ }
+
+ /**
+ * Attempts to create a new resource file with the given encoding. The method first checks the base Resource Bundle
+ * to make sure that encoding does not all ready exist. If it does exist the resource file is not created.
+ * @param title An optional, quick title for the file, can be <CODE>null</CODE>
+ * @param comment An optional comment to be added to the resource, can be <CODE>null</CODE>
+ * @param manager The name of the person responsible for this resource, can be <CODE>null</CODE>
+ * @param encoding The proper encoding for the resource. Must be of form 'language', 'language_country', or 'language_country_variant'
+ * @param language A more formal name for the language (e.g. 'English', 'Deutsch', etc.), can be <CODE>null</CODE>
+ * @param country A more formal name for the country described by the resource, can be <CODE>null</CODE>
+ * @param variant A more formal name for the variant described by the resource, can be <CODE>null</CODE>
+ * @param copyValues An indication of wether or not to populate the resource with the items in the base class
+ * @return An error response. If the creation was successful <CODE>true</CODE> is returned, if there was an error <CODE>false</CODE> is returned.
+ */
+
+ public boolean createResource(String title, String comment, String manager, String encoding,
+ String language, String country, String variant, boolean copyValues) {
+ if (encoding == null || encoding.equals("") || encoding.startsWith("_")) return false;
+ // Check to see if the encoding exists
+ if (hasResource(encoding)) return false;
+ // Create the resource
+ Bundle bundle = new Bundle(encoding);
+ bundle.name = title;
+ bundle.comment = comment;
+ bundle.manager = manager;
+ bundle.language = language;
+ bundle.country = country;
+ bundle.variant = variant;
+
+ // Create a default group
+ bundle.addBundleGroup("Ungrouped Items", "These are resource items that have not been assigned a group");
+
+ if (copyValues) {
+ Bundle mainBundle = (Bundle)bundles.firstElement();
+ for (int i=0; i < mainBundle.getGroupCount(); i++) {
+ BundleGroup mainGroup = mainBundle.getBundleGroup(i);
+ BundleGroup bg = new BundleGroup(bundle,mainGroup.getName());
+ bg.setComment(mainGroup.getComment());
+ bundle.addBundleGroup(bg);
+ for (int j=0; j < mainGroup.getItemCount(); j++) {
+ BundleItem mainItem = mainGroup.getBundleItem(j);
+ BundleItem item = new BundleItem(bg, mainItem.getKey(), mainItem.getTranslation());
+ item.setComment(mainItem.getComment());
+ item.setCreator(mainItem.getCreator());
+ item.setModifier(mainItem.getModifier());
+ item.setLookups(new Hashtable());
+ // TODO: This should be done in the Bundle class
+ Enumeration enum = mainItem.getLookups().keys();
+ while (enum.hasMoreElements()) {
+ String name = (String)enum.nextElement();
+ String value = (String)mainItem.getLookups().get(name);
+ item.getLookups().put(new String(name), new String(value));
+ }
+ bg.addBundleItem(item);
+ bundle.addUntranslatedItem(item);
+ }
+ }
+ }
+
+ bundles.addElement(bundle);
+
+ return true;
+ }
+
+ /**
+ * Returns the number of duplicate NLS entries
+ */
+
+ public int getNumberDuplicates() {
+ return ((Bundle)bundles.firstElement()).duplicates.size();
+ }
+
+ /**
+ * Returns a single string with a comma delimited listing of all duplicate entries found in the NLS resources
+ */
+
+ public String getDuplicatesListing() {
+ return listStrings(getDuplicatesListingVector());
+ }
+
+ /**
+ * Returns a Vector collection of duplicate BundleItems found in the bundle
+ */
+
+ public Vector getDuplicatesListingVector() {
+ return ((Bundle)bundles.firstElement()).duplicates;
+ }
+
+ /**
+ * A useful debugging method that lists the various BundleGroup names in a String.
+ */
+
+ public String getGroupListing() {
+ return listStrings(getGroupListingVector());
+ }
+
+ /**
+ * Returns a vector collection of all of the BundleGroup items founds int the bundle.
+ */
+
+ public Vector getGroupListingVector() {
+ Vector v = new Vector();
+ Bundle bundle = (Bundle)bundles.firstElement();
+ for (int i=0; i < bundle.getGroupCount(); i++) {
+ String name = bundle.getBundleGroup(i).getName();
+ v.addElement(name);
+ }
+ return v;
+ }
+
+ /**
+ * Returns the total number of languages that the system seems to support
+ */
+
+ public int getNumberLanguages() {
+ return bundles.size();
+ }
+
+ /**
+ * Returns a single string comprised of a comma delimited listing of all languages the system seems to support
+ */
+
+ public String getLanguageListing() {
+ return listStrings(getLanguageListingVector());
+ }
+
+ /**
+ * Returns a vector of strings comprising a list of all languages in the system
+ */
+
+ public Vector getLanguageListingVector() {
+ Vector v = new Vector();
+
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle dict = (Bundle)bundles.elementAt(i);
+ String dictStr = new String();
+ if (dict.language != null) dictStr += dict.language;
+ if (dict.country != null) dictStr += " " + dict.country;
+ if (dict.variant != null) dictStr += " " + dict.variant;
+ if (dictStr.trim().equals("")) dictStr = (dict.encoding.trim().equals("") ? "Base Resource Bundle" : dict.encoding);
+ v.addElement(dictStr);
+ }
+
+ return v;
+ }
+
+ /**
+ * Returns the number of translations contained across all language files
+ */
+
+ public int getNumberTotalTranslations() {
+ return allBundleKeys.size();
+ }
+
+ /**
+ * Returns the number of BundleGroups in the bundle.
+ */
+
+ public int getNumberGroups() {
+ return ((Bundle)bundles.firstElement()).getGroupCount();
+ }
+
+ /**
+ * Returns the name of the user currently using the editor
+ */
+
+ public String getUser() {
+ return currentUser;
+ }
+
+ /**
+ * Sets the name of the user currently using the editor
+ */
+
+ public void setUser(String user) {
+ currentUser = user;
+ }
+
+ /**
+ * Sets the name of the base class associated with this resource bundle
+ */
+
+ public void setBaseClass(String baseClassName) {
+ baseClass = baseClassName;
+ }
+
+ /**
+ * Sets the directory in the file system in which this resource bundle is to be
+ * saved and retrieved.
+ */
+
+ public void setFileDirectory(File directory) {
+ if (directory.isDirectory()) currentDirectory = directory;
+ }
+
+ /**
+ * Returns the base class name if known, or "Unknown Base Class" otherwise.
+ */
+ public String toSring() {
+ return (baseClass == null ? "Unknown Base Class" : baseClass);
+ }
+
+ /**
+ * Returns the base class name or null if it does not exist.
+ */
+
+ public String getBaseClass() {
+ return baseClass;
+ }
+
+ /**
+ * A Vector of NLSbundles, one for each language
+ */
+ public Vector getBundles() {
+ return bundles;
+ }
+
+ /**
+ * Return a bundle from a locale
+ * @return The requested resource bundle
+ */
+ public Bundle getBundle(String locale) {
+ Bundle bundle = null;
+ if (hasResource(locale)) {
+ for (int i = 0; i < bundles.size(); i++) {
+ Bundle tempb = (Bundle)bundles.elementAt(i);
+ if (tempb.encoding.equals(locale)) {
+ bundle = tempb;
+ break;
+ }
+ }
+ }
+ return bundle;
+ }
+
+ /**
+ * Returns the name of the file that is the base class file for the resource bundle.
+ */
+
+ public File getBaseFile() {
+ return new File(currentDirectory,baseClass + ".properties");
+ }
+
+ // Return a single comma delimited string made from a vector of strings
+ private String listStrings(Vector v) {
+ String retStr = new String();
+ for (int i = 0; i < v.size(); i++) {
+ Object o = v.elementAt(i);
+ if (!(o instanceof String)) continue;
+ String s = (String)o;
+ if (i > 0) retStr += ", ";
+ retStr += s;
+ }
+ return retStr;
+ }
+
+ // Init - called before ant construction
+ private void init() {
+ allBundleKeys = new Vector();
+ bundles = new Vector();
+ currentUser = "Unknown";
+ }
+
+ // Return a hashtable of the tags in a comment line (i.e. the text after each '@' character) and their values
+ private Hashtable getDescriptors(Hashtable result, String line) {
+ // Recursion terminating condition
+ if (line == null || line.length() <= 0 || line.indexOf("@") < 0) return result;
+ // Otherwise generate what information we can and recurse
+ if (result == null) result = new Hashtable();
+ // Strip off any information before and including a '@'
+ line = line.substring(line.indexOf("@")+1, line.length());
+ // There should be a space after the '@_tag_' and the value of this property
+ if (line.indexOf(" ") < 0) return result; // This shouldn't happen if things are formatted right
+ // Add the text after the '@' character up to the first whitespace (has to be a space, not tab or other whitespace)
+ String name = line.substring(0,line.indexOf(" ")).trim();
+ // Now strip off the tag name
+ line = line.substring(line.indexOf(" "), line.length());
+ // If there is another '@' character we take the value up until that character
+ if (line.indexOf("@") >= 0) {
+ result.put(name,line.substring(0,line.indexOf("@")).trim());
+ }
+ // Otherwise we take the rest of the characters in the line
+ else {
+ result.put(name,line.trim());
+ return result;
+ }
+ // Recurse
+ return getDescriptors(result, line.substring(line.indexOf("@"), line.length()));
+ }
+
+ // Checks an array of strings to see if it contains a particular string
+/* private static boolean arrayContains(String[] array, String match) {
+ for (int i = 0; i < array.length; i++) {
+ if (array[i].equals(match)) return true;
+ }
+ return false;
+ }*/
+
+ // Prints the usage of the program when called from main
+/* private static void printUsage() {
+ String usage = new String();
+ usage += "Usage:\n\njava com.ibm.almaden.TempusFugit.Tools.RBManager fileName ((-r | -d) encoding?)?";
+ usage += "\n\n fileName -> The file (and path?) representing the main NLS resource\n\t\t(i.e. TempusFugit.resources)\n";
+ usage += " encoding -> Returns results for only the language encoding specified\n";
+ usage += " flag -r -> Gives only a status report on the state of the translations\n";
+ System.out.println(usage);
+ }*/
+
+}
+
diff --git a/unicodetools/com/ibm/rbm/RBPropertiesExporter.java b/unicodetools/com/ibm/rbm/RBPropertiesExporter.java
new file mode 100644
index 0000000..b7df7cb
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBPropertiesExporter.java
@@ -0,0 +1,81 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import javax.swing.*;
+import java.util.*;
+
+/**
+ * This class provides a plug-in exporter utility for RBManager that outputs Java
+ * standard .properties files in the according to the file structure of Resource
+ * Bundles. Most all meta-data is lost in this export.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBPropertiesExporter extends RBExporter {
+
+ public RBPropertiesExporter() {
+ super();
+
+ // Initialize the file chooser if necessary
+ if (chooser == null) {
+ chooser = new JFileChooser();
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public String getDescription() {
+ return "Base Class Properties Files";
+ }
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ String name = f.getName();
+ if (name.toLowerCase().endsWith(".properties") && f.getName().indexOf("_") < 0) return true;
+ return false;
+ }
+ });
+ } // end if
+ }
+
+ public void export(RBManager rbm) throws IOException {
+ if (rbm == null) return;
+ // Open the Save Dialog
+ int ret_val = chooser.showSaveDialog(null);
+ if (ret_val != JFileChooser.APPROVE_OPTION) return;
+ // Retrieve basic file information
+ File file = chooser.getSelectedFile(); // The file(s) we will be working with
+ File directory = new File(file.getParent()); // The directory we will be writing to
+ String base_name = file.getName(); // The base name of the files we will write
+ if (base_name == null || base_name.equals("")) base_name = rbm.getBaseClass();
+ if (base_name.toLowerCase().endsWith(".properties"))
+ base_name = base_name.substring(0,base_name.length()-11);
+
+ Vector bundle_v = rbm.getBundles();
+ for (int i=0; i < bundle_v.size(); i++) {
+ Properties prop = new Properties();
+ Bundle bundle = (Bundle)bundle_v.elementAt(i);
+ String base_enc = base_name;
+ if (bundle.encoding != null && !bundle.encoding.equals("")) base_enc = base_enc + "_" + bundle.encoding;
+ String file_name = base_enc + ".properties";
+ String header = "Resource Bundle: " + file_name + " - File automatically generated by RBManager at " + (new Date());
+
+ Vector group_v = bundle.getGroupsAsVector();
+ for (int j=0; j < group_v.size(); j++) {
+ BundleGroup group = (BundleGroup)group_v.elementAt(j);
+ Vector item_v = group.getItemsAsVector();
+ for (int k=0; k < item_v.size(); k++) {
+ BundleItem item = (BundleItem)item_v.elementAt(k);
+ prop.setProperty(item.getKey(), item.getTranslation());
+ } // end for - k
+ } // end for - j
+
+ // Write out the file
+ File write_file = new File(directory, file_name);
+ FileOutputStream fos = new FileOutputStream(write_file);
+ prop.store(fos, header);
+ } // end for - i
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBPropertiesImporter.java b/unicodetools/com/ibm/rbm/RBPropertiesImporter.java
new file mode 100644
index 0000000..017d68c
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBPropertiesImporter.java
@@ -0,0 +1,120 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+import java.util.*;
+
+/**
+ * This is the super class for all importer plug-in classes. As of yet, there
+ * is little contained in this class.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBPropertiesImporter extends RBImporter {
+
+ boolean isRBMFile = true;
+
+ /**
+ * Constructs the importer given the parent data classes and a Dialog title.
+ */
+
+ public RBPropertiesImporter(String title, RBManager rbm, RBManagerGUI gui) {
+ super(title, rbm, gui);
+ }
+
+ protected void setupFileChooser() {
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().toLowerCase().endsWith(".properties") && f.getName().indexOf("_") < 0) return true;
+ return false;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("import_properties_file_description");
+ }
+ });
+ }
+
+ protected void beginImport() throws IOException {
+ super.beginImport();
+ File baseFile = getChosenFile();
+ FileReader fr = new FileReader(baseFile);
+ BufferedReader br = new BufferedReader(fr);
+
+ // Test if this is an RBManager generated file or not
+ int count = 0;
+ String line = null;
+ isRBMFile = true;
+ while ((line = br.readLine()) != null) {
+ if (!line.trim().equals("")) count++;
+ if (count == 1 && !line.startsWith("# @file")) {
+ // Not generated by RBManager
+ isRBMFile = false;
+ }
+ } // end while
+ if (isRBMFile) {
+ // Treat the file as generated by RBManager
+ // Parse the resource bundle through RBManager
+ RBManager import_rbm = new RBManager(baseFile);
+ // Merge the two resource bundles
+ Vector bundles = import_rbm.getBundles();
+ Vector encodings = new Vector();
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle b = (Bundle)bundles.elementAt(i);
+ encodings.addElement(b.encoding);
+ }
+ resolveEncodings(encodings);
+ for (int i=0; i < bundles.size(); i++) {
+ Bundle b = (Bundle)bundles.elementAt(i);
+ Enumeration enum = b.allItems.keys();
+ while (enum.hasMoreElements()) {
+ String key = (String)enum.nextElement();
+ BundleItem item = (BundleItem)b.allItems.get(key);
+ importResource(item, b.encoding, (item.getParentGroup() == null ? getDefaultGroup(): item.getParentGroup().getName()));
+ }
+ }
+ } else {
+ // Just treat it as a regular properties file
+ // Check if there are any missing target locale files
+ String baseName = baseFile.getName().substring(0,baseFile.getName().length()-11); // |'.properties'| == 11
+ File baseDir = new File(baseFile.getParent());
+ String allChildren[] = baseDir.list();
+ Vector children_v = new Vector();
+ for (int i=0; i < allChildren.length; i++) {
+ if (allChildren[i].startsWith(baseName) && allChildren[i].toLowerCase().endsWith(".properties")) {
+ if (allChildren[i].length() == (baseName + ".properties").length()) children_v.addElement("");
+ else children_v.addElement(allChildren[i].substring(baseName.length()+1, allChildren[i].indexOf(".properties")));
+ }
+ }
+ showProgressBar(children_v.size());
+ resolveEncodings(children_v);
+ // Run through each source locale file importing as necessary
+ for (int i=0; i < children_v.size(); i++) {
+ Properties p = new Properties();
+ FileInputStream fis = new FileInputStream(new File(baseDir, baseName +
+ (children_v.elementAt(i).toString().equals("") ? "" : "_" + children_v.elementAt(i).toString()) +
+ ".properties"));
+ p.load(fis);
+ Enumeration enum = p.keys();
+ while (enum.hasMoreElements()) {
+ String key = (String)enum.nextElement();
+ BundleItem item = new BundleItem(null, key, p.getProperty(key));
+ item.setTranslated(this.getDefaultTranslated());
+ importResource(item, children_v.elementAt(i).toString(), getDefaultGroup());
+ }
+ incrementProgressBar();
+ }
+ hideProgressBar();
+ }
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBReporter.bat b/unicodetools/com/ibm/rbm/RBReporter.bat
new file mode 100755
index 0000000..eb8fb33
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBReporter.bat
@@ -0,0 +1,2 @@
+@echo off
+java -classpath RBManager.jar;lib\xerces.jar;. com.ibm.rbm.RBReporter %1
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBReporter.java b/unicodetools/com/ibm/rbm/RBReporter.java
new file mode 100644
index 0000000..fd919c0
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBReporter.java
@@ -0,0 +1,1191 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import javax.swing.*;
+import java.awt.*;
+import java.awt.event.*;
+import java.io.*;
+import java.util.*;
+
+import org.apache.xerces.dom.DocumentImpl;
+import org.apache.xml.serialize.*;
+import org.w3c.dom.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+/**
+ * RBReporter is a fully functional application that runs separate from RBManager.
+ * The report produces statistically based reports on specified resource bundles,
+ * and it allows the user to set time intervals at which those reports will be
+ * generated. For more information on command line arguments and usage see the
+ * comments for the main() method.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBReporter extends JFrame {
+
+ // ** COMPONENTS **
+ JLabel statusLabel; // Indicates if the reported is running
+ JButton statusButton; // Button for toggling the reporter on/off
+ JLabel nextReportLabel; // Indicates date/time of next report
+ JLabel lastReportLabel; // Indicates date/time of last report
+ JTextField bundleField; // Indicates input base class file
+ JTextField directoryField; // Indicates output directory
+ JCheckBox textCheck; // Is text report generated?
+ JCheckBox htmlCheck; // Is HTML report generated?
+ JCheckBox xmlCheck; // Is XML report generated?
+ JCheckBox scanCheck; // Is code scan performed?
+ JTextField textField; // Text report file name
+ JTextField htmlField; // HTML report file name
+ JTextField xmlField; // XML report file name
+ JTextField scanField; // XML scanner file location
+ JComboBox textCombo; // Text report detail level
+ JComboBox htmlCombo; // HTML report detail level
+ JComboBox xmlCombo; // XML report detail level
+ JRadioButton sequentialRadio; // Report at sequential interval?
+ JRadioButton definedRadio; // Report at defined time?
+ JComboBox valueCombo; // Number of units to wait between reports
+ JComboBox unitCombo; // Units of time
+ JComboBox hourCombo; // Defined time to report -- hours
+ JComboBox minuteCombo; // Defined time to report -- minutes
+ JComboBox dayCombo; // Defined time to report -- day
+
+ // ** File Chooser **
+ JFileChooser bundleFileChooser = new JFileChooser();
+ JFileChooser directoryFileChooser = new JFileChooser();
+ JFileChooser scanFileChooser = new JFileChooser();
+
+ // ** DATA **
+ Date lastReport = null;
+ Date nextReport = null;
+ boolean running = false;
+ /** For generating a report */
+ RBManager rbm;
+
+ private RBReporter(boolean makeVisible) {
+ try {
+ // Get the look and feel from preferences
+ try {
+ String laf = Preferences.getPreference("lookandfeel");
+ if (!laf.equals(""))
+ UIManager.setLookAndFeel(laf);
+ }
+ catch (Exception e) {
+ }
+ // Get the locale from preferences
+ if (!Preferences.getPreference("locale").equals("")) {
+ String localeStr = Preferences.getPreference("locale");
+ String language = Resources.getLanguage(localeStr);
+ String country = Resources.getCountry(localeStr);
+ String variant = Resources.getVariant(localeStr);
+ if (language == null || language.equals("") || language.length() > 3) language = "en";
+ if (country == null) country = new String();
+ if (variant == null) Resources.setLocale(new Locale(language, country));
+ else Resources.setLocale(new Locale(language, country, variant));
+ }
+ Resources.initBundle();
+ }
+ catch (Exception e) {
+ e.printStackTrace();
+ }
+ initComponents();
+ setVisible(makeVisible);
+ Thread reportThread = new Thread(){
+ public void run() {
+ if (nextReport != null && (nextReport.compareTo(new Date()) <= 0)) {
+ try { generateReports(); } catch (IOException ioe) {}
+ }
+ if (nextReport == null)
+ nextReport = generateNextReportDate();
+ updateStatusComponents();
+ updateDateFields();
+ while (true) {
+ if (running && (nextReport.compareTo(new Date()) < 0)) {
+ try {generateReports();}
+ catch (IOException ioe) {
+ JOptionPane.showMessageDialog(null, ioe.getMessage(),
+ Resources.getTranslation("error"),
+ JOptionPane.ERROR_MESSAGE);
+ }
+ }
+ try {
+ sleep(1000);
+ } catch (Exception e) {
+ e.printStackTrace(System.err);
+ }
+ }
+ }
+ };
+ reportThread.start();
+ }
+
+ // Called when a report should be generated. Does not check if it should be generated
+ private void generateReports() throws IOException {
+ File baseFile = new File(bundleField.getText());
+ if (baseFile == null || !baseFile.isFile())
+ throw new IOException("Specified input file is unusable");
+ File directory = new File(directoryField.getText());
+ rbm = new RBManager(baseFile);
+
+ if (rbm == null)
+ throw new IOException("Unable to load the resource bundle file");
+ if (directory == null || !directory.isDirectory())
+ throw new IOException("Specified output directory is unusable");
+ RBReporterScanner scanner = null;
+ if (scanCheck.isSelected()) {
+ scanner = new RBReporterScanner((Bundle)rbm.getBundles().elementAt(0),
+ new File(scanField.getText()));
+ scanner.performScan();
+ }
+ if (textCheck.isSelected()) {
+ File textFile = new File(directory, textField.getText());
+ String textReport = getAllLanguageReports(textCombo.getSelectedIndex() == 0);
+ if (scanCheck.isSelected()) {
+ // Add file scan information
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("\n\nCode Scan Results:\n\n");
+ buffer.append("\n\tNumber of unique resources found: " + scanner.getNumberResourcesFound());
+ buffer.append("\n\tNumber of resources missing from bundle: " + scanner.getNumberMissingResources());
+ // Missing resources from the bundle
+ buffer.append("\n\tMissing Resources: ");
+ Vector v = scanner.getMissingResources();
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ if (textCombo.getSelectedIndex() == 0) {
+ buffer.append("\n\t\t" + result.getName() + " (" + result.getOccurances().size() + " Occurances)");
+ buffer.append("\n\t\t\t" + result.getOccurances());
+ } else {
+ buffer.append((i==0 ? "" : ", ") + result.getName() + " (" + result.getOccurances().size() + " Occurances)");
+ }
+ }
+ // Bundle resources not found in the code
+ buffer.append("\n\tNumber of potentially unused resources in bundle: " + scanner.getNumberUnusedResources());
+ v = scanner.getUnusedResources();
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ if (textCombo.getSelectedIndex() == 0) {
+ buffer.append("\n\t\t" + result.getName() + " (Group: " + result.getGroupName() + ")");
+ } else {
+ buffer.append((i==0 ? "" : ", ") + result.getName());
+ }
+ }
+
+ textReport = textReport + buffer.toString();
+ }
+ FileWriter fw = new FileWriter(textFile);
+ fw.write(textReport);
+ fw.flush();
+ fw.close();
+ }
+ if (htmlCheck.isSelected()) {
+ File htmlFile = new File(directory, htmlField.getText());
+ Document htmlReport = getHTMLReportz(htmlCombo.getSelectedIndex() == 0);
+ if (scanCheck.isSelected()) {
+ // Add file scan information
+ Element html_elem = htmlReport.getDocumentElement();
+ NodeList nl = html_elem.getElementsByTagName("BODY");
+ Element body_elem = (Element)nl.item(0);
+ Element h2_elem = htmlReport.createElement("H2");
+ Text h2_text = htmlReport.createTextNode("Code Scan Results");
+ Element block_elem = htmlReport.createElement("BLOCKQUOTE");
+ Element p1_elem = htmlReport.createElement("P");
+ Element p2_elem = htmlReport.createElement("P");
+ Element p3_elem = htmlReport.createElement("P");
+ Text p1_text = htmlReport.createTextNode("Number of unique resources found: " +
+ scanner.getNumberMissingResources());
+ Text p2_text = htmlReport.createTextNode("Number of resources missing from bundle: " +
+ scanner.getNumberMissingResources());
+ Text p3_text = htmlReport.createTextNode("Number of potentially unused resources in bundle: " +
+ scanner.getNumberUnusedResources());
+
+ h2_elem.appendChild(h2_text);
+ p1_elem.appendChild(p1_text);
+ p2_elem.appendChild(p2_text);
+ p3_elem.appendChild(p3_text);
+ block_elem.appendChild(p1_elem);
+ block_elem.appendChild(p2_elem);
+ block_elem.appendChild(p3_elem);
+ body_elem.appendChild(h2_elem);
+ body_elem.appendChild(block_elem);
+
+ // Missing resources from the bundle
+ Text missing_text = null;
+ Vector v = scanner.getMissingResources();
+ if (htmlCombo.getSelectedIndex() == 0) {
+ Element ul_elem = htmlReport.createElement("UL");
+ missing_text = htmlReport.createTextNode("Missing Resources:");
+ ul_elem.appendChild(missing_text);
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ Element li_elem = htmlReport.createElement("LI");
+ Element br_elem = htmlReport.createElement("BR");
+ Text t1_text = htmlReport.createTextNode(result.getName() + " (" +
+ result.getOccurances().size() + " Occurances)");
+ Text t2_text = htmlReport.createTextNode(result.getOccurances().toString());
+ li_elem.appendChild(t1_text);
+ li_elem.appendChild(br_elem);
+ li_elem.appendChild(t2_text);
+ ul_elem.appendChild(li_elem);
+ }
+ p2_elem.appendChild(ul_elem);
+ } else {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("Missing Resources: ");
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ buffer.append((i==0 ? "" : ", ") + result.getName() + " (" + result.getOccurances().size() + " Occurances)");
+ }
+ missing_text = htmlReport.createTextNode(buffer.toString());
+ Element br_elem = htmlReport.createElement("BR");
+ p2_elem.appendChild(br_elem);
+ p2_elem.appendChild(missing_text);
+ }
+ // Bundle resources not found in the code
+ Text unused_text = null;
+ v = scanner.getUnusedResources();
+ if (htmlCombo.getSelectedIndex() == 0) {
+ Element ul_elem = htmlReport.createElement("UL");
+ unused_text = htmlReport.createTextNode("Unused Resources:");
+ ul_elem.appendChild(unused_text);
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ Element li_elem = htmlReport.createElement("LI");
+ Text t1_text = htmlReport.createTextNode(result.getName() + " (Group: " +
+ result.getGroupName() + ")");
+ li_elem.appendChild(t1_text);
+ ul_elem.appendChild(li_elem);
+ }
+ p3_elem.appendChild(ul_elem);
+ } else {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("Unused Resources: ");
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ buffer.append((i==0 ? "" : ", ") + result.getName());
+ }
+ unused_text = htmlReport.createTextNode(buffer.toString());
+ Element br_elem = htmlReport.createElement("BR");
+ p3_elem.appendChild(br_elem);
+ p3_elem.appendChild(unused_text);
+ }
+ }
+ FileWriter fw = new FileWriter(htmlFile);
+ OutputFormat of = new OutputFormat(htmlReport);
+ of.setIndenting(true);
+ of.setEncoding("ISO-8859-1");
+ HTMLSerializer serializer = new HTMLSerializer(fw, of);
+ serializer.serialize(htmlReport);
+ }
+ if (xmlCheck.isSelected()) {
+ File xmlFile = new File(directory, xmlField.getText());
+ Document xmlReport = getXMLReportz(xmlCombo.getSelectedIndex() == 0);
+ if (scanCheck.isSelected()) {
+ // Add file scan information
+ Element root = xmlReport.getDocumentElement();
+ Element code_scan_elem = xmlReport.createElement("CODE_SCAN");
+ Element unique_elem = xmlReport.createElement("UNIQUE_RESOURCES");
+ Element missing_elem = xmlReport.createElement("MISSING_RESOURCES");
+ Element unused_elem = xmlReport.createElement("UNUSED_RESOURCES");
+ Element unique_total_elem = xmlReport.createElement("TOTAL");
+ Element missing_total_elem = xmlReport.createElement("TOTAL");
+ Element unused_total_elem = xmlReport.createElement("TOTAL");
+ Text unique_total_text = xmlReport.createTextNode(String.valueOf(scanner.getNumberMissingResources()));
+ Text missing_total_text = xmlReport.createTextNode(String.valueOf(scanner.getNumberMissingResources()));
+ Text unused_total_text = xmlReport.createTextNode(String.valueOf(scanner.getNumberUnusedResources()));
+
+ unique_total_elem.appendChild(unique_total_text);
+ missing_total_elem.appendChild(missing_total_text);
+ unused_total_elem.appendChild(unused_total_text);
+ unique_elem.appendChild(unique_total_elem);
+ missing_elem.appendChild(missing_total_elem);
+ unused_elem.appendChild(unused_total_elem);
+ code_scan_elem.appendChild(unique_elem);
+ code_scan_elem.appendChild(missing_elem);
+ code_scan_elem.appendChild(unused_elem);
+ root.appendChild(code_scan_elem);
+ // Missing resources from the bundle
+ Vector v = scanner.getMissingResources();
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ Element item_elem = xmlReport.createElement("RESOURCE");
+ item_elem.setAttribute("NAME",result.getName());
+ if (xmlCombo.getSelectedIndex() == 0) {
+ Vector occ_v = result.getOccurances();
+ for (int j=0; j < occ_v.size(); j++) {
+ Occurance occ = (Occurance)occ_v.elementAt(j);
+ Element occ_elem = xmlReport.createElement("OCCURANCE");
+ occ_elem.setAttribute("FILE_NAME", occ.getFileName());
+ occ_elem.setAttribute("FILE_PATH", occ.getFilePath());
+ occ_elem.setAttribute("LINE_NUMBER", String.valueOf(occ.getLineNumber()));
+ item_elem.appendChild(occ_elem);
+ }
+ }
+ missing_elem.appendChild(item_elem);
+ }
+ // Bundle resources not found in the code
+ v = scanner.getUnusedResources();
+ for (int i=0; i < v.size(); i++) {
+ ScanResult result = (ScanResult)v.elementAt(i);
+ Element item_elem = xmlReport.createElement("RESOURCE");
+ item_elem.setAttribute("NAME",result.getName());
+ item_elem.setAttribute("GROUP",result.getGroupName());
+ unused_elem.appendChild(item_elem);
+ }
+ }
+ FileWriter fw = new FileWriter(xmlFile);
+ OutputFormat of = new OutputFormat(xmlReport);
+ of.setIndenting(true);
+ of.setEncoding("ISO-8859-1");
+ XMLSerializer serializer = new XMLSerializer(fw, of);
+ serializer.serialize(xmlReport);
+ }
+
+ lastReport = new Date();
+ nextReport = generateNextReportDate();
+ updateDateFields();
+ if (!isVisible()) {
+ System.out.println("RBReporter: Generated report at " + lastReport.toString());
+ System.out.println("RBReporter: Next report at " + nextReport.toString());
+ }
+ }
+
+ // Assumes the last report was just generated, and computes the next report time accordingly
+ private Date generateNextReportDate() {
+ Date retDate = null;
+ GregorianCalendar now = new GregorianCalendar();
+ if (sequentialRadio.isSelected()) {
+ int value = Integer.parseInt(valueCombo.getSelectedItem().toString());
+ if (unitCombo.getSelectedIndex() == 0) now.add(Calendar.MINUTE, value);
+ else if (unitCombo.getSelectedIndex() == 1) now.add(Calendar.HOUR, value);
+ else if (unitCombo.getSelectedIndex() == 2) now.add(Calendar.DATE, value);
+ retDate = now.getTime();
+ } else if (definedRadio.isSelected()) {
+ int hour = Integer.parseInt(hourCombo.getSelectedItem().toString());
+ int minute = Integer.parseInt(minuteCombo.getSelectedItem().toString());
+ int day = dayCombo.getSelectedIndex();
+
+ GregorianCalendar then = new GregorianCalendar();
+ then.set(Calendar.HOUR, hour);
+ then.set(Calendar.MINUTE, minute);
+ then.set(Calendar.SECOND, 0);
+
+ if (then.getTime().compareTo(now.getTime()) <= 0) then.add(Calendar.DATE, 1);
+ if (day > 0 && day <= 7) {
+ // Make sure we are at the right day
+ boolean rightDay = false;
+ while (!rightDay) {
+ int weekDay = then.get(Calendar.DAY_OF_WEEK);
+ if ((day == 1 && weekDay == Calendar.MONDAY) ||
+ (day == 2 && weekDay == Calendar.TUESDAY) ||
+ (day == 3 && weekDay == Calendar.WEDNESDAY) ||
+ (day == 4 && weekDay == Calendar.THURSDAY) ||
+ (day == 5 && weekDay == Calendar.FRIDAY) ||
+ (day == 6 && weekDay == Calendar.SATURDAY) ||
+ (day == 7 && weekDay == Calendar.SUNDAY)) rightDay = true;
+ else then.add(Calendar.DATE, 1);
+ }
+ }
+ retDate = then.getTime();
+ }
+ RBManagerGUI.debugMsg("Next Date: " + retDate.toString());
+ return retDate;
+ }
+
+ /**
+ * Returns a string based text report about all of the language files on record
+ */
+ public String getAllLanguageReports(boolean detailed) {
+ String retStr = new String();
+ retStr = "Resource Bundle Report: " + rbm.getBaseClass();
+ retStr += "\nReport Generated: " + (new Date()).toString() + "\n\n";
+ Vector bundles = rbm.getBundles();
+ for (int i=0; i < bundles.size(); i++) {
+ retStr += getLanguageReport(detailed, (Bundle)bundles.elementAt(i));
+ }
+ return retStr;
+ }
+
+ private String getLanguageReport(boolean detailed, Bundle dict) {
+ if (dict == null) return "";
+ String retStr = new String();
+ retStr += "\nLanguage: " + (dict.language == null ? dict.encoding : dict.language);
+ retStr += (dict.country == null ? "" : " - Country: " + dict.country);
+ retStr += (dict.variant == null ? "" : " - Variant: " + dict.variant);
+ retStr += "\n";
+ retStr += " Number of NLS items in the file: " + dict.allItems.size() + "\n";
+
+ int untranslated = 0;
+ String untransStr = new String();
+ Enumeration enum = dict.allItems.elements();
+ while (enum.hasMoreElements()) {
+ BundleItem tempItem = (BundleItem)enum.nextElement();
+ if (tempItem.isTranslated()) continue;
+ untranslated++;
+ untransStr += " " + tempItem.getKey();
+ }
+ retStr += " Number of NLS items not translated: " + untranslated;
+ if (detailed) {
+ retStr += "\n Untranslated NLS keys: " + untransStr;
+ }
+
+ return retStr;
+ }
+
+ /**
+ * Returns an XHTML formatted report on the status of the currently opened resource bundle
+ */
+ public Document getHTMLReportz(boolean detailed) {
+ Document html = new DocumentImpl();
+ Element root = html.createElement("HTML");
+ html.appendChild(root);
+ Element head_elem = html.createElement("HEAD");
+ Element title_elem = html.createElement("TITLE");
+ Text title_text = html.createTextNode("Resource Bundle Report - " + rbm.getBaseClass());
+ Element body_elem = html.createElement("BODY");
+ Element center1_elem = html.createElement("CENTER");
+ Element h1_elem = html.createElement("H1");
+ Element center2_elem = html.createElement("CENTER");
+ Element h3_elem = html.createElement("H1");
+ Text title1_text = html.createTextNode("Resource Bundle Report: " + rbm.getBaseClass());
+ Text title2_text = html.createTextNode("Report Generated: " + (new Date()).toString());
+ Vector bundles = rbm.getBundles();
+
+ title_elem.appendChild(title_text);
+ head_elem.appendChild(title_elem);
+ h1_elem.appendChild(title1_text);
+ h3_elem.appendChild(title2_text);
+ center1_elem.appendChild(h1_elem);
+ center2_elem.appendChild(h3_elem);
+ body_elem.appendChild(center1_elem);
+ body_elem.appendChild(center2_elem);
+ root.appendChild(head_elem);
+ root.appendChild(body_elem);
+
+ for (int i=0; i < bundles.size(); i++) {
+ getHTMLLanguageReportz(html, body_elem, detailed, (Bundle)bundles.elementAt(i));
+ }
+
+ return html;
+ }
+
+ /**
+ * Returns a HTML report as a String object on the status of the currently opened resource bundle
+ */
+ public String getHTMLReport(boolean detailed) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("<HTML>\n<HEAD><TITLE>Resource Bundle Report - " + rbm.getBaseClass() + "</TITLE></HEAD>\n<BODY>\n");
+ buffer.append("<CENTER><H1>Resource Bundle Report: " + rbm.getBaseClass() + "</H1></CENTER>\n");
+ buffer.append("<CENTER><H3>Report Generated: " + (new Date()).toString() + "</H3></CENTER>\n");
+
+ Vector bundles = rbm.getBundles();
+ for (int i=0; i < bundles.size(); i++) {
+ buffer.append(getHTMLLanguageReport(detailed, (Bundle)bundles.elementAt(i)));
+ }
+
+ buffer.append("</BODY>\n</HTML>");
+ return buffer.toString();
+ }
+
+ private void getHTMLLanguageReportz(Document html, Element body_elem, boolean detailed, Bundle dict) {
+ Element h2_elem = html.createElement("H2");
+ Text h2_text = html.createTextNode("Language: " + (dict.language == null ? dict.encoding : dict.language) +
+ (dict.country == null ? "" : " - Country: " + dict.country) +
+ (dict.variant == null ? "" : " - Variant: " + dict.variant));
+ Element block_elem = html.createElement("BLOCKQUOTE");
+ Element p_elem = html.createElement("P");
+ Text p_text = html.createTextNode("Number of NLS items in the file: " +
+ String.valueOf(dict.allItems.size()));
+ Element ul_elem = html.createElement("UL");
+ Text ul_text = html.createTextNode("Untranslated NLS keys:");
+
+ h2_elem.appendChild(h2_text);
+ p_elem.appendChild(p_text);
+ ul_elem.appendChild(ul_text);
+ block_elem.appendChild(p_elem);
+ body_elem.appendChild(h2_elem);
+ body_elem.appendChild(block_elem);
+
+ int untranslated = 0;
+ Enumeration enum = dict.allItems.elements();
+ while (enum.hasMoreElements()) {
+ BundleItem tempItem = (BundleItem)enum.nextElement();
+ if (tempItem.isTranslated()) continue;
+ untranslated++;
+ if (detailed) {
+ Element li_elem = html.createElement("LI");
+ Text li_text = html.createTextNode(tempItem.getKey());
+ li_elem.appendChild(li_text);
+ ul_elem.appendChild(li_elem);
+ }
+ }
+ Element p2_elem = html.createElement("P");
+ Text p2_text = html.createTextNode("Number of NLS items not translated: " +
+ String.valueOf(untranslated));
+ p2_elem.appendChild(p2_text);
+ block_elem.appendChild(p2_elem);
+ if (detailed) block_elem.appendChild(ul_elem);
+ }
+
+ private String getHTMLLanguageReport(boolean detailed, Bundle dict) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("\n<H2>Language: " + (dict.language == null ? dict.encoding : dict.language));
+ buffer.append(dict.country == null ? "" : " - Country: " + dict.country);
+ buffer.append(dict.variant == null ? "" : " - Variant: " + dict.variant);
+ buffer.append("</H2>\n");
+ buffer.append("<BLOCKQUOTE>\n");
+
+ buffer.append("<P>Number of NLS items in the file: " + String.valueOf(dict.allItems.size()) + "</P>\n");
+ int untranslated = 0;
+ Enumeration enum = dict.allItems.elements();
+ StringBuffer innerBuffer = new StringBuffer();
+ while (enum.hasMoreElements()) {
+ BundleItem tempItem = (BundleItem)enum.nextElement();
+ if (tempItem.isTranslated()) continue;
+ untranslated++;
+ innerBuffer.append("<LI>" + tempItem.getKey() + "</LI>\n");
+ }
+ buffer.append("<P>Number of NLS items not translated: " + String.valueOf(untranslated) + "</P>\n");
+ if (detailed) {
+ buffer.append("<UL>Untranslated NLS keys:\n");
+ buffer.append(innerBuffer.toString());
+ buffer.append("</UL>\n");
+ }
+
+ buffer.append("</BLOCKQUOTE>\n");
+ return buffer.toString();
+ }
+
+ /**
+ * Returns an XML formatted report on the status of the currently open resource bundle
+ */
+
+ public Document getXMLReportz(boolean detailed) {
+ Document xml = new DocumentImpl();
+ Element root = xml.createElement("REPORT");
+ root.setAttribute("BASECLASS", rbm.getBaseClass());
+ root.setAttribute("DATE", (new Date()).toString());
+ xml.appendChild(root);
+
+ Vector bundles = rbm.getBundles();
+ for (int i=0; i < bundles.size(); i++) {
+ root.appendChild(getXMLLanguageReportz(xml, detailed, (Bundle)bundles.elementAt(i)));
+ }
+ return xml;
+ }
+
+ /**
+ * Returns an XML formatted report as a String object on the status of the currently open resource bundle
+ */
+
+ public String getXMLReport(boolean detailed) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("<?xml version=\"1.0\"?>\n");
+ buffer.append("<REPORT BASECLASS=\"" + rbm.getBaseClass() + "\" DATE=\"" + (new Date()).toString() + "\">\n");
+
+ Vector bundles = rbm.getBundles();
+ for (int i=0; i < bundles.size(); i++) {
+ buffer.append(getXMLLanguageReport(detailed, (Bundle)bundles.elementAt(i)));
+ }
+ buffer.append("</REPORT>");
+ return buffer.toString();
+ }
+
+ private Element getXMLLanguageReportz(Document xml, boolean detailed, Bundle dict) {
+ Element lang_report_elem = xml.createElement("LANGUAGE_REPORT");
+ Element locale_elem = xml.createElement("LOCALE");
+ locale_elem.setAttribute("LANGUAGE", (dict.language == null ? dict.encoding : dict.language));
+ locale_elem.setAttribute("COUNTRY", (dict.country == null ? "" : dict.country));
+ locale_elem.setAttribute("VARIANT", (dict.variant == null ? "" : dict.variant));
+ Element nls_total_elem = xml.createElement("NLS_TOTAL");
+ Text nls_total_text = xml.createTextNode(String.valueOf(dict.allItems.size()));
+ Element untranslated_total_elem = xml.createElement("UNTRANSLATED_TOTAL");
+ Element untranslated_elem = xml.createElement("UNTRANSLATED");
+
+ nls_total_elem.appendChild(nls_total_text);
+ lang_report_elem.appendChild(locale_elem);
+ lang_report_elem.appendChild(nls_total_elem);
+ lang_report_elem.appendChild(untranslated_total_elem);
+ if (detailed) lang_report_elem.appendChild(untranslated_elem);
+
+ int untranslated = 0;
+ Enumeration enum = dict.allItems.elements();
+ while (enum.hasMoreElements()) {
+ BundleItem tempItem = (BundleItem)enum.nextElement();
+ if (tempItem.isTranslated()) continue;
+ untranslated++;
+ Element resource_elem = xml.createElement("RESOURCEKEY");
+ Text resource_text = xml.createTextNode(tempItem.getKey());
+ resource_elem.appendChild(resource_text);
+ untranslated_elem.appendChild(resource_elem);
+ }
+ Text untranslated_total_text = xml.createTextNode(String.valueOf(untranslated));
+ untranslated_total_elem.appendChild(untranslated_total_text);
+
+ return lang_report_elem;
+ }
+
+ private String getXMLLanguageReport(boolean detailed, Bundle dict) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append("<LANGUAGE_REPORT>\n");
+
+ buffer.append("\n\t<LOCALE LANGUAGE=\"" + (dict.language == null ? dict.encoding : dict.language));
+ buffer.append("\" COUNTRY=\"" + (dict.country == null ? "" : dict.country));
+ buffer.append("\" VARIANT=\"" + (dict.variant == null ? "" : dict.variant) + "\"/>\n");
+
+ buffer.append("\t<NLS_TOTAL>" + String.valueOf(dict.allItems.size()) + "</NLS_TOTAL>\n");
+ int untranslated = 0;
+ Enumeration enum = dict.allItems.elements();
+ StringBuffer innerBuffer = new StringBuffer();
+ while (enum.hasMoreElements()) {
+ BundleItem tempItem = (BundleItem)enum.nextElement();
+ if (tempItem.isTranslated()) continue;
+ untranslated++;
+ innerBuffer.append("\t\t<RESOURCEKEY>" + tempItem.getKey() + "</RESOURCEKEY>\n");
+ }
+ buffer.append("\t<UNTRANSLATED_TOTAL>" + String.valueOf(untranslated) + "</UNTRANSLATED_TOTAL>\n");
+ if (detailed) {
+ buffer.append("\t<UNTRANSLATED>\n");
+ buffer.append(innerBuffer.toString());
+ buffer.append("\t</UNTRANSLATED>\n");
+ }
+
+ buffer.append("</LANGUAGE_REPORT>\n");
+ return buffer.toString();
+ }
+
+ private void updateDateFields() {
+ if (nextReport == null) nextReportLabel.setText(Resources.getTranslation("reporter_next_report", "--"));
+ else nextReportLabel.setText(Resources.getTranslation("reporter_next_report", nextReport.toString()));
+ if (lastReport == null) lastReportLabel.setText(Resources.getTranslation("reporter_last_report", "--"));
+ else lastReportLabel.setText(Resources.getTranslation("reporter_last_report", lastReport.toString()));
+ }
+
+ private void updateStatusComponents() {
+ if (running) {
+ statusLabel.setText(Resources.getTranslation("reporter_status_running"));
+ statusLabel.setForeground(Color.green);
+ statusButton.setText(Resources.getTranslation("reporter_button_stop"));
+ } else {
+ statusLabel.setText(Resources.getTranslation("reporter_status_stopped"));
+ statusLabel.setForeground(Color.red);
+ statusButton.setText(Resources.getTranslation("reporter_button_start"));
+ }
+ }
+
+ private void setComponentsToDefaults() {
+ if ((running && Preferences.getPreference("reporter_enabled").equals("No")) ||
+ (!running && Preferences.getPreference("reporter_enabled").equals("Yes"))) toggleStatus();
+ if (Preferences.getPreference("reporter_format_text_enabled") != null)
+ textCheck.setSelected(Preferences.getPreference("reporter_format_text_enabled").equals("Yes"));
+ if (Preferences.getPreference("reporter_format_html_enabled") != null)
+ htmlCheck.setSelected(Preferences.getPreference("reporter_format_html_enabled").equals("Yes"));
+ if (Preferences.getPreference("reporter_format_xml_enabled") != null)
+ xmlCheck.setSelected(Preferences.getPreference("reporter_format_xml_enabled").equals("Yes"));
+ if (Preferences.getPreference("reporter_format_text_file") != null &&
+ !Preferences.getPreference("reporter_format_text_file").equals(""))
+ textField.setText(Preferences.getPreference("reporter_format_text_file"));
+ if (Preferences.getPreference("reporter_format_html_file") != null &&
+ !Preferences.getPreference("reporter_format_html_file").equals(""))
+ htmlField.setText(Preferences.getPreference("reporter_format_html_file"));
+ if (Preferences.getPreference("reporter_format_xml_file") != null &&
+ !Preferences.getPreference("reporter_format_xml_file").equals(""))
+ xmlField.setText(Preferences.getPreference("reporter_format_xml_file"));
+ if (Preferences.getPreference("reporter_format_text_detail") != null &&
+ !Preferences.getPreference("reporter_format_text_detail").equals(""))
+ selectComboValue(textCombo, Preferences.getPreference("reporter_format_text_detail"));
+ if (Preferences.getPreference("reporter_format_html_detail") != null &&
+ !Preferences.getPreference("reporter_format_html_detail").equals(""))
+ selectComboValue(htmlCombo, Preferences.getPreference("reporter_format_html_detail"));
+ if (Preferences.getPreference("reporter_format_xml_detail") != null &&
+ !Preferences.getPreference("reporter_format_xml_detail").equals(""))
+ selectComboValue(xmlCombo, Preferences.getPreference("reporter_format_xml_detail"));
+ if (Preferences.getPreference("reporter_interval").equals("Sequential"))
+ sequentialRadio.setSelected(true);
+ else definedRadio.setSelected(true);
+ if (Preferences.getPreference("reporter_interval_sequential_value") != null &&
+ !Preferences.getPreference("reporter_interval_sequential_value").equals(""))
+ selectComboValue(valueCombo, Preferences.getPreference("reporter_interval_sequential_value"));
+ if (Preferences.getPreference("reporter_interval_sequential_units") != null &&
+ !Preferences.getPreference("reporter_interval_sequential_units").equals(""))
+ selectComboValue(valueCombo, Preferences.getPreference("reporter_interval_sequential_units"));
+ if (Preferences.getPreference("reporter_interval_defined_hour") != null &&
+ !Preferences.getPreference("reporter_interval_defined_hour").equals(""))
+ selectComboValue(hourCombo, Preferences.getPreference("reporter_interval_defined_hour"));
+ if (Preferences.getPreference("reporter_interval_defined_day") != null &&
+ !Preferences.getPreference("reporter_interval_defined_day").equals(""))
+ selectComboValue(dayCombo, Preferences.getPreference("reporter_interval_defined_day"));
+ if (Preferences.getPreference("reporter_interval_defined_minute") != null &&
+ !Preferences.getPreference("reporter_interval_defined_minute").equals(""))
+ selectComboValue(minuteCombo, Preferences.getPreference("reporter_interval_defined_minute"));
+ if (Preferences.getPreference("reporter_scan_file") != null &&
+ !Preferences.getPreference("reporter_scan_file").equals(""))
+ scanField.setText(Preferences.getPreference("reporter_scan_file"));
+ if (Preferences.getPreference("reporter_perform_scan") != null)
+ scanCheck.setSelected(Preferences.getPreference("reporter_perform_scan").equals("Yes"));
+ }
+
+ private static void selectComboValue(JComboBox box, String value) {
+ for (int i=0; i < box.getItemCount(); i++) {
+ if (box.getItemAt(i).toString().equals(value)) {
+ box.setSelectedIndex(i);
+ break;
+ }
+ }
+ }
+
+ private void saveDefaults() {
+ // Save format options
+ Preferences.setPreference("reporter_format_text_enabled", (textCheck.isSelected() ? "Yes" : "No"));
+ Preferences.setPreference("reporter_format_text_file", textField.getText());
+ Preferences.setPreference("reporter_format_text_detail", textCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_format_html_enabled", (htmlCheck.isSelected() ? "Yes" : "No"));
+ Preferences.setPreference("reporter_format_html_file", htmlField.getText());
+ Preferences.setPreference("reporter_format_html_detail", htmlCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_format_xml_enabled", (xmlCheck.isSelected() ? "Yes" : "No"));
+ Preferences.setPreference("reporter_format_xml_file", xmlField.getText());
+ Preferences.setPreference("reporter_format_xml_detail", xmlCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_scan_file", scanField.getText());
+ Preferences.setPreference("reporter_perform_scan", (scanCheck.isSelected() ? "Yes" : "No"));
+ // Save interval options
+ Preferences.setPreference("reporter_interval", (sequentialRadio.isSelected() ? "Sequential" : "Defined"));
+ Preferences.setPreference("reporter_interval_sequential_value", valueCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_interval_sequential_units", unitCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_interval_defined_hour", hourCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_interval_defined_minute", minuteCombo.getSelectedItem().toString());
+ Preferences.setPreference("reporter_interval_defined_day", dayCombo.getSelectedItem().toString());
+ // Save system options
+ Preferences.setPreference("reporter_enabled", (running ? "Yes" : "No"));
+ // Write the preferences
+ try {
+ Preferences.savePreferences();
+ } catch (IOException ioe) {
+ // TODO: Warn of error through JOptionPane
+ ioe.printStackTrace();
+ }
+ }
+
+ private void toggleStatus() {
+ if (running) {
+ running = false;
+ } else {
+ running = true;
+ }
+ updateStatusComponents();
+ }
+
+ private void initComponents() {
+
+ // File choosers
+ bundleFileChooser.setFileFilter(new javax.swing.filechooser.FileFilter() {
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+
+ String name = f.getName();
+ if (!(name.toLowerCase().endsWith(".properties"))) return false;
+ if (name.indexOf("_") > 0) return false;
+ return true;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("dialog_file_filter_description");
+ }
+ });
+ bundleFileChooser.setSelectedFile(new File(Preferences.getPreference("reporter_base_class_file")));
+
+ directoryFileChooser.setFileFilter(new javax.swing.filechooser.FileFilter() {
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ return false;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("directory");
+ }
+ });
+ directoryFileChooser.setSelectedFile(new File(Preferences.getPreference("reporter_output_directory")));
+
+ scanFileChooser.setFileFilter(new javax.swing.filechooser.FileFilter() {
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().endsWith(".xml")) return true;
+ return false;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("dialog_file_filter_description_scan");
+ }
+ });
+ scanFileChooser.setSelectedFile(new File(Preferences.getPreference("reporter_scan_file")));
+
+ // New top level components
+ JPanel statusPanel = new JPanel();
+ JPanel intervalPanel = new JPanel();
+ JPanel optionsPanel = new JPanel();
+ JPanel formatPanel = new JPanel();
+ Box mainBox = new Box(BoxLayout.Y_AXIS);
+ int width = 600;
+ int height = 600;
+ int compHeight = 20;
+ Dimension mainDim = new Dimension(width,height);
+
+ statusPanel.setBorder(BorderFactory.createTitledBorder(BorderFactory.createEtchedBorder(),
+ Resources.getTranslation("reporter_panel_status")));
+ intervalPanel.setBorder(BorderFactory.createTitledBorder(BorderFactory.createEtchedBorder(),
+ Resources.getTranslation("reporter_panel_interval")));
+ optionsPanel.setBorder(BorderFactory.createTitledBorder(BorderFactory.createEtchedBorder(),
+ Resources.getTranslation("reporter_panel_options")));
+ formatPanel.setBorder(BorderFactory.createTitledBorder(BorderFactory.createEtchedBorder(),
+ Resources.getTranslation("reporter_panel_output_format")));
+
+ // ** STATUS PANEL SETUP **
+ JButton nowButton = new JButton(Resources.getTranslation("reporter_button_now"));
+ Box statusBox = new Box(BoxLayout.Y_AXIS);
+ JPanel statusPanel1 = new JPanel();
+ JPanel statusPanel2 = new JPanel();
+ JPanel statusPanel3 = new JPanel();
+ JPanel statusPanel4 = new JPanel();
+ statusButton = new JButton(Resources.getTranslation("reporter_button_start"));
+ statusLabel = new JLabel(Resources.getTranslation("reporter_status_stopped"));
+ nextReportLabel = new JLabel(Resources.getTranslation("reporter_next_report", "--"));
+ lastReportLabel = new JLabel(Resources.getTranslation("reporter_last_report", "--"));
+ statusLabel.setFont(new Font("serif",Font.BOLD,14));
+ statusLabel.setForeground(Color.red);
+ statusPanel2.setLayout(new FlowLayout(FlowLayout.LEFT));
+ statusPanel3.setLayout(new FlowLayout(FlowLayout.LEFT));
+ statusPanel.setLayout(new BorderLayout());
+
+ nowButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent ev) {
+ try {
+ generateReports();
+ } catch (Exception e) {
+ JOptionPane.showMessageDialog(null, e.getMessage(), Resources.getTranslation("error"),
+ JOptionPane.ERROR_MESSAGE);
+ RBManagerGUI.debugMsg(e.toString());
+ if (RBManagerGUI.debug) e.printStackTrace(System.err);
+ }
+ }
+ });
+
+ statusButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent ev) {
+ toggleStatus();
+ }
+ });
+
+ statusPanel1.add(statusLabel);
+ statusPanel2.add(nextReportLabel);
+ statusPanel3.add(lastReportLabel);
+ statusPanel4.add(nowButton);
+ statusPanel4.add(Box.createHorizontalStrut(7));
+ statusPanel4.add(statusButton);
+ statusBox.add(statusPanel1);
+ statusBox.add(Box.createVerticalStrut(7));
+ //statusBox.add(Box.createHorizontalGlue());
+ statusBox.add(statusPanel2);
+ //statusBox.add(Box.createHorizontalGlue());
+ statusBox.add(statusPanel3);
+ statusBox.add(Box.createVerticalStrut(7));
+ statusBox.add(statusPanel4);
+ statusPanel.add(statusBox, BorderLayout.CENTER);
+
+ // ** OPTIONS PANEL SETUP **
+ JLabel inputLabel = new JLabel(Resources.getTranslation("reporter_input_bundle"));
+ JLabel outputLabel = new JLabel(Resources.getTranslation("reporter_output_directory"));
+ JButton inputButton = new JButton(Resources.getTranslation("reporter_button_choose"));
+ JButton outputButton = new JButton(Resources.getTranslation("reporter_button_choose"));
+ JButton scanButton = new JButton(Resources.getTranslation("reporter_button_choose"));
+ JButton defaultButton = new JButton(Resources.getTranslation("reporter_button_save_defaults"));
+ JLabel textLabel = new JLabel(Resources.getTranslation("reporter_output_file"));
+ JLabel htmlLabel = new JLabel(Resources.getTranslation("reporter_output_file"));
+ JLabel xmlLabel = new JLabel(Resources.getTranslation("reporter_output_file"));
+ JLabel textLabel2 = new JLabel(Resources.getTranslation("reporter_detail_level"));
+ JLabel htmlLabel2 = new JLabel(Resources.getTranslation("reporter_detail_level"));
+ JLabel xmlLabel2 = new JLabel(Resources.getTranslation("reporter_detail_level"));
+ JPanel optionsPanel1 = new JPanel();
+ JPanel optionsPanel2 = new JPanel();
+ JPanel optionsPanelA = new JPanel();
+ JPanel optionsPanel3 = new JPanel();
+ JPanel optionsPanel4 = new JPanel();
+ JPanel optionsPanel5 = new JPanel();
+ JPanel optionsPanel6 = new JPanel();
+ Box optionsBox = new Box(BoxLayout.Y_AXIS);
+ Box outputBox = new Box(BoxLayout.Y_AXIS);
+
+ bundleField = new JTextField(Preferences.getPreference("reporter_base_class_file"));
+ directoryField = new JTextField(Preferences.getPreference("reporter_output_directory"));
+ textCheck = new JCheckBox(Resources.getTranslation("reporter_format_text"));
+ htmlCheck = new JCheckBox(Resources.getTranslation("reporter_format_html"));
+ xmlCheck = new JCheckBox(Resources.getTranslation("reporter_format_xml"));
+ scanCheck = new JCheckBox(Resources.getTranslation("reporter_perform_scan"), false);
+ textField = new JTextField("report.txt");
+ htmlField = new JTextField("report.html");
+ xmlField = new JTextField("report.xml");
+ scanField = new JTextField();
+ String [] detailLevels = {Resources.getTranslation("reporter_detail_high"),
+ Resources.getTranslation("reporter_detail_normal")};
+ textCombo = new JComboBox(detailLevels);
+ htmlCombo = new JComboBox(detailLevels);
+ xmlCombo = new JComboBox(detailLevels);
+
+ bundleField.setColumns(30);
+ directoryField.setColumns(30);
+ scanField.setColumns(30);
+ textField.setColumns(15);
+ htmlField.setColumns(15);
+ xmlField.setColumns(15);
+ Dimension checkDim = new Dimension(55,compHeight);
+ textCheck.setPreferredSize(checkDim);
+ htmlCheck.setPreferredSize(checkDim);
+ xmlCheck.setPreferredSize(checkDim);
+ optionsPanel1.setLayout(new FlowLayout(FlowLayout.RIGHT));
+ optionsPanel2.setLayout(new FlowLayout(FlowLayout.RIGHT));
+ optionsPanelA.setLayout(new FlowLayout(FlowLayout.RIGHT));
+
+ inputButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent ev) {
+ setInputBundle();
+ }
+ });
+
+ outputButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent ev) {
+ setOutputBundle();
+ }
+ });
+
+ scanButton.addActionListener(new ActionListener(){
+ public void actionPerformed(ActionEvent ev) {
+ setScanFile();
+ }
+ });
+
+ defaultButton.addActionListener(new ActionListener() {
+ public void actionPerformed(ActionEvent ev) {
+ saveDefaults();
+ }
+ });
+
+ optionsPanel6.add(defaultButton);
+ optionsPanel3.add(textCheck);
+ optionsPanel3.add(Box.createHorizontalStrut(5));
+ optionsPanel3.add(textLabel);
+ optionsPanel3.add(Box.createHorizontalStrut(5));
+ optionsPanel3.add(textField);
+ optionsPanel3.add(Box.createHorizontalStrut(5));
+ optionsPanel3.add(textLabel2);
+ optionsPanel3.add(Box.createHorizontalStrut(5));
+ optionsPanel3.add(textCombo);
+ optionsPanel4.add(htmlCheck);
+ optionsPanel4.add(Box.createHorizontalStrut(5));
+ optionsPanel4.add(htmlLabel);
+ optionsPanel4.add(Box.createHorizontalStrut(5));
+ optionsPanel4.add(htmlField);
+ optionsPanel4.add(Box.createHorizontalStrut(5));
+ optionsPanel4.add(htmlLabel2);
+ optionsPanel4.add(Box.createHorizontalStrut(5));
+ optionsPanel4.add(htmlCombo);
+ optionsPanel5.add(xmlCheck);
+ optionsPanel5.add(Box.createHorizontalStrut(5));
+ optionsPanel5.add(xmlLabel);
+ optionsPanel5.add(Box.createHorizontalStrut(5));
+ optionsPanel5.add(xmlField);
+ optionsPanel5.add(Box.createHorizontalStrut(5));
+ optionsPanel5.add(xmlLabel2);
+ optionsPanel5.add(Box.createHorizontalStrut(5));
+ optionsPanel5.add(xmlCombo);
+ outputBox.add(optionsPanel3);
+ outputBox.add(optionsPanel4);
+ outputBox.add(optionsPanel5);
+ formatPanel.add(outputBox);
+ optionsPanel1.add(inputLabel);
+ optionsPanel1.add(Box.createHorizontalStrut(5));
+ optionsPanel1.add(bundleField);
+ optionsPanel1.add(Box.createHorizontalStrut(5));
+ optionsPanel1.add(inputButton);
+ optionsPanel2.add(outputLabel);
+ optionsPanel2.add(Box.createHorizontalStrut(5));
+ optionsPanel2.add(directoryField);
+ optionsPanel2.add(Box.createHorizontalStrut(5));
+ optionsPanel2.add(outputButton);
+ optionsPanelA.add(scanCheck);
+ optionsPanelA.add(Box.createHorizontalStrut(5));
+ optionsPanelA.add(scanField);
+ optionsPanelA.add(Box.createHorizontalStrut(5));
+ optionsPanelA.add(scanButton);
+ optionsBox.add(optionsPanel1);
+ optionsBox.add(optionsPanel2);
+ optionsBox.add(optionsPanelA);
+ optionsBox.add(formatPanel);
+ optionsBox.add(optionsPanel6);
+ optionsPanel.add(optionsBox);
+
+ // ** INTERVAL PANEL SETUP **
+ String boxArray1[] = {"1","2","3","4","5","6","7","8","9","10","11","12","15","20","24","25","30"};
+ String boxArray2[] = {Resources.getTranslation("reporter_time_minutes"),
+ Resources.getTranslation("reporter_time_hours"),
+ Resources.getTranslation("reporter_time_days")};
+ String boxArray3[] = {"1","2","3","4","5","6","7","8","9","10","11","12",
+ "13","14","15","16","17","18","19","20","21","22","23","0"};
+ String boxArray4[] = {"00","15","30","45"};
+ String boxArray5[] = {Resources.getTranslation("reporter_time_everyday"),
+ Resources.getTranslation("reporter_time_monday"),
+ Resources.getTranslation("reporter_time_tuesday"),
+ Resources.getTranslation("reporter_time_wednesday"),
+ Resources.getTranslation("reporter_time_thursday"),
+ Resources.getTranslation("reporter_time_friday"),
+ Resources.getTranslation("reporter_time_saturday"),
+ Resources.getTranslation("reporter_time_sunday")};
+
+ JLabel colonLabel = new JLabel(":");
+ sequentialRadio = new JRadioButton(Resources.getTranslation("reporter_interval_sequential"));
+ definedRadio = new JRadioButton(Resources.getTranslation("reporter_interval_defined"), true);
+ valueCombo = new JComboBox(boxArray1);
+ unitCombo = new JComboBox(boxArray2);
+ hourCombo = new JComboBox(boxArray3);
+ minuteCombo = new JComboBox(boxArray4);
+ dayCombo = new JComboBox(boxArray5);
+ JPanel intervalPanel1 = new JPanel();
+ JPanel intervalPanel2 = new JPanel();
+ intervalPanel1.setLayout(new FlowLayout(FlowLayout.LEFT));
+ intervalPanel2.setLayout(new FlowLayout(FlowLayout.LEFT));
+ Box intervalBox = new Box(BoxLayout.Y_AXIS);
+ intervalPanel.setLayout(new BorderLayout());
+
+ ButtonGroup bg = new ButtonGroup();
+ bg.add(sequentialRadio);
+ bg.add(definedRadio);
+
+ intervalPanel1.add(sequentialRadio);
+ intervalPanel1.add(Box.createHorizontalStrut(5));
+ intervalPanel1.add(valueCombo);
+ intervalPanel1.add(Box.createHorizontalStrut(5));
+ intervalPanel1.add(unitCombo);
+ intervalPanel2.add(definedRadio);
+ intervalPanel2.add(Box.createHorizontalStrut(5));
+ intervalPanel2.add(hourCombo);
+ intervalPanel2.add(colonLabel);
+ intervalPanel2.add(minuteCombo);
+ intervalPanel2.add(Box.createHorizontalStrut(5));
+ intervalPanel2.add(dayCombo);
+ intervalBox.add(intervalPanel1);
+ intervalBox.add(intervalPanel2);
+ intervalPanel.add(intervalBox, BorderLayout.WEST);
+
+ // ** MAINBOX SETUP **
+ mainBox.removeAll();
+ mainBox.add(statusPanel);
+ mainBox.add(intervalPanel);
+ mainBox.add(optionsPanel);
+
+ // ** MAIN FRAME SETUP **
+ setLocation(new java.awt.Point(25, 25));
+ setSize(mainDim);
+ //((JComponent)getContentPane()).setMaximumSize(dimMainMax);
+ //((JComponent)getContentPane()).setMinimumSize(dimMainMin);
+ //setJMenuBar(jMenuBarMain);
+ getContentPane().setLayout(new BorderLayout());
+ getContentPane().removeAll();
+ getContentPane().add(mainBox, BorderLayout.CENTER);
+ setTitle(Resources.getTranslation("resource_bundle_reporter"));
+ //validateTree();
+ setComponentsToDefaults();
+ nextReport = generateNextReportDate();
+ updateDateFields();
+ repaint();
+
+ addWindowListener(new java.awt.event.WindowAdapter() {
+ public void windowClosing(java.awt.event.WindowEvent ev) {
+ thisWindowClosing(ev);
+ }
+ });
+ }
+
+ public void thisWindowClosing(WindowEvent ev) {
+ setVisible(false);
+ dispose();
+ System.exit(0);
+ }
+
+ private void setInputBundle() {
+ int result = bundleFileChooser.showOpenDialog(this);
+ if (result == JFileChooser.APPROVE_OPTION) {
+ File f = bundleFileChooser.getSelectedFile();
+ if (f != null) {
+ bundleField.setText(f.getAbsolutePath());
+ Preferences.setPreference("reporter_base_class_file",f.getAbsolutePath());
+ try {Preferences.savePreferences();} catch (IOException ioe) {}
+ }
+ }
+ }
+
+ private void setOutputBundle() {
+ int result = directoryFileChooser.showOpenDialog(this);
+ if (result == JFileChooser.APPROVE_OPTION) {
+ File f = directoryFileChooser.getSelectedFile();
+ if (!f.isDirectory()) f = new File(f.getParent());
+ if (f != null) {
+ directoryField.setText(f.getAbsolutePath());
+ Preferences.setPreference("reporter_output_directory",f.getAbsolutePath());
+ try {Preferences.savePreferences();} catch (IOException ioe) {}
+ }
+ }
+ }
+
+ private void setScanFile() {
+ int result = scanFileChooser.showOpenDialog(this);
+ if (result == JFileChooser.APPROVE_OPTION) {
+ File f = scanFileChooser.getSelectedFile();
+ if (f != null) {
+ scanField.setText(f.getAbsolutePath());
+ Preferences.setPreference("reporter_scan_file",f.getAbsolutePath());
+ try {Preferences.savePreferences();} catch (IOException ioe) {}
+ }
+ }
+ }
+
+ private static String getUsage() {
+ return "\nRBReporter Command Line Usage:\n\n" +
+ "Default Usage (GUI): java com.ibm.rbm.RBReporter\n" +
+ "Options Usage: java com.ibm.rbm.RBReporter [-gui | -now | -line]\n\n" +
+ "Options: -gui Run the Graphical User Interface\n" +
+ " -now Execute the Report Generation Immediately\n" +
+ " -line Run the Reporter without the GUI";
+ }
+
+ public static void main(String args[]) {
+ RBReporter reporter;
+ if (args.length == 1) {
+ if (args[0].equals("-gui")) {
+ reporter = new RBReporter(true);
+ } else if (args[0].equals("-now")) {
+ reporter = new RBReporter(false);
+ try {
+ reporter.generateReports();
+ System.out.println("RBReporter: Generation of reports successful. " + new Date());
+ } catch (IOException ioe) {
+ System.out.println("There was an error generating the reports...\n\n\t" + ioe.getMessage());
+ }
+ reporter.thisWindowClosing(null);
+ } else if (args[0].equals("-line")) {
+ reporter = new RBReporter(false);
+ if (!reporter.running)
+ reporter.toggleStatus();
+ System.out.println("RBReporter: Next Report at " + reporter.nextReport.toString());
+ } else {
+ System.out.println(getUsage());
+ }
+ } else if (args.length == 0) {
+ reporter = new RBReporter(true);
+ } else {
+ System.out.println(getUsage());
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBReporterScanner.java b/unicodetools/com/ibm/rbm/RBReporterScanner.java
new file mode 100644
index 0000000..4ed340c
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBReporterScanner.java
@@ -0,0 +1,304 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+
+import java.io.*;
+import java.util.*;
+
+import org.apache.xerces.parsers.DOMParser;
+import org.w3c.dom.*;
+import org.xml.sax.*;
+
+/**
+ * RBReporterScaner is a utility class for RBReporter. It creates a report from an xml settings
+ * file that scans code for resources and compares them against a resource bundle.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBReporter
+ */
+public class RBReporterScanner {
+ private Bundle bundle;
+ private Document config;
+ private Hashtable fileRules;
+ private Hashtable parseRules;
+ private Hashtable results;
+ private Hashtable missing;
+ private boolean resultsFound;
+
+ protected RBReporterScanner(Bundle bundle, File configFile) throws IOException {
+ resultsFound = false;
+ this.bundle = bundle;
+
+ try {
+ InputSource is = new InputSource(new FileInputStream(configFile));
+ DOMParser parser = new DOMParser();
+ parser.parse(is);
+ config = parser.getDocument();
+ } catch (SAXException saxe) {
+ throw new IOException("Illegal XML Document: " + saxe.getMessage());
+ }
+
+ Element root = config.getDocumentElement();
+ fileRules = getFileRules(root);
+ parseRules = getParseRules(root);
+
+ results = new Hashtable();
+ Enumeration enum = bundle.allItems.keys();
+ while (enum.hasMoreElements()) {
+ String key = (String)enum.nextElement();
+ BundleItem item = (BundleItem)bundle.allItems.get(key);
+ results.put(key, new ScanResult(item));
+ }
+
+ missing = new Hashtable();
+ }
+
+ protected int getNumberResourcesFound() {
+ return results.size();
+ }
+
+ protected int getNumberMissingResources() {
+ return missing.size();
+ }
+
+ protected int getNumberUnusedResources() {
+ int count = 0;
+ Enumeration enum = results.elements();
+ while (enum.hasMoreElements()) {
+ ScanResult result = (ScanResult)enum.nextElement();
+ if (result.getOccurances().size() < 1) count++;
+ }
+ return count;
+ }
+
+ protected Vector getMissingResources() {
+ Enumeration enum = missing.elements();
+ Vector v = new Vector();
+ while (enum.hasMoreElements()) v.addElement(enum.nextElement());
+ return v;
+ }
+
+ protected Vector getUnusedResources() {
+ Enumeration enum = results.elements();
+ Vector v = new Vector();
+ while (enum.hasMoreElements()) {
+ ScanResult result = (ScanResult)enum.nextElement();
+ if (result.getOccurances().size() < 1) {
+ v.addElement(result);
+ }
+ }
+ return v;
+ }
+
+ protected boolean performScan() throws IOException {
+ resultsFound = false;
+
+ Element root = config.getDocumentElement();
+ NodeList nl = root.getElementsByTagName("Scan");
+ if (nl.getLength() < 1) return resultsFound;
+ Element scan_elem = (Element)nl.item(0);
+ nl = scan_elem.getElementsByTagName("Directory");
+ for (int i=0; i < nl.getLength(); i++) {
+ Element dir_elem = (Element)nl.item(i);
+ File directory = new File(dir_elem.getAttribute("location"));
+ boolean recurse = dir_elem.getAttribute("recurse_directories").equalsIgnoreCase("true");
+ NodeList rules_list = dir_elem.getElementsByTagName("Rules");
+ if (rules_list.getLength() < 1) continue;
+ Element rules_elem = (Element)rules_list.item(0);
+ NodeList frules_list = rules_elem.getElementsByTagName("ApplyFileRule");
+ // For each file rule
+ for (int j=0; j < frules_list.getLength(); j++) {
+ Element frule_elem = (Element)frules_list.item(j);
+ FileRule frule = (FileRule)fileRules.get(frule_elem.getAttribute("name"));
+ if (frule == null) continue;
+ NodeList prules_list = frule_elem.getElementsByTagName("ApplyParseRule");
+ Vector prules_v = new Vector();
+ // For each parse rule
+ for (int k=0; k < prules_list.getLength(); k++) {
+ Element prule_elem = (Element)prules_list.item(k);
+ ParseRule prule = (ParseRule)parseRules.get(prule_elem.getAttribute("name"));
+ if (prule == null) continue;
+ prules_v.addElement(prule);
+ }
+ if (prules_v.size() < 1) continue;
+ scanDirectory(directory, frule, prules_v, recurse);
+ }
+ }
+
+ return resultsFound;
+ }
+
+ private void scanDirectory(File directory, FileRule frule, Vector prules, boolean recurse) throws IOException {
+
+ // Recursion step
+ if (recurse) {
+ File children[] = directory.listFiles(new java.io.FileFilter(){
+ public boolean accept(File f) {
+ return f.isDirectory();
+ }
+
+ public String getDescription() {
+ return "";
+ }
+ });
+ for (int i=0; i < children.length; i++) {
+ File new_directory = children[i];
+ scanDirectory(new_directory, frule, prules, recurse);
+ }
+ }
+ // Go through each acceptable file
+ File children[] = directory.listFiles();
+ for (int i=0; i < children.length; i++) {
+ File f = children[i];
+ if (f.isDirectory() || !(frule.applyRule(f.getName()))) continue;
+ FileReader fr = new FileReader(f);
+ BufferedReader br = new BufferedReader(fr);
+ String line = null;
+ int line_count = 0;
+ // Read the file line by line
+ while ((line = br.readLine()) != null) {
+ line_count++;
+ Vector findings = new Vector();
+ // Apply all parse rules to each line
+ for (int j=0; j < prules.size(); j++) {
+ ParseRule prule = (ParseRule)prules.elementAt(j);
+ Vector temp_results = prule.applyRule(line);
+ for (int k=0; k < temp_results.size(); k++) {
+ findings.addElement(temp_results.elementAt(k));
+ }
+ }
+ for (int j=0; j < findings.size(); j++) {
+ String name = (String)findings.elementAt(j);
+ Occurance occ = new Occurance(f.getName(), f.getAbsolutePath(), line_count);
+ // If the name is found in the resource bundles derived hashtable
+ if (results.containsKey(name)) {
+ ScanResult scan_res = (ScanResult)results.get(name);
+ scan_res.addOccurance(occ);
+ } else {
+ // Add it to the missing results
+ ScanResult scan_res = new ScanResult(new BundleItem(null, name, "*unknown*"));
+ scan_res.addOccurance(occ);
+ missing.put(name, scan_res);
+ results.put(name, scan_res);
+ }
+ }
+ }
+ }
+ }
+
+ private Hashtable getFileRules(Element root) {
+ Hashtable result = new Hashtable();
+ NodeList frules_list = root.getElementsByTagName("FileRules");
+ Element frules_elem = null;
+ if (frules_list.getLength() > 0) frules_elem = (Element)frules_list.item(0);
+ if (frules_elem == null) return result;
+ frules_list = frules_elem.getElementsByTagName("FileRule");
+ for (int i=0; i < frules_list.getLength(); i++) {
+ Element elem = (Element)frules_list.item(i);
+ FileRule frule = new FileRule(elem.getAttribute("name"), elem.getAttribute("starts_with"),
+ elem.getAttribute("ends_with"), elem.getAttribute("contains"));
+ result.put(elem.getAttribute("name"), frule);
+ }
+ return result;
+ }
+
+ private Hashtable getParseRules(Element root) {
+ Hashtable result = new Hashtable();
+ NodeList prules_list = root.getElementsByTagName("ParseRules");
+ Element prules_elem = null;
+ if (prules_list.getLength() > 0)
+ prules_elem = (Element)prules_list.item(0);
+ if (prules_elem == null)
+ return result;
+ prules_list = prules_elem.getElementsByTagName("ParseRule");
+ for (int i=0; i < prules_list.getLength(); i++) {
+ Element elem = (Element)prules_list.item(i);
+ ParseRule prule = new ParseRule(elem.getAttribute("name"), elem.getAttribute("follows"),
+ elem.getAttribute("precedes"));
+ result.put(elem.getAttribute("name"), prule);
+ }
+ return result;
+ }
+}
+
+class FileRule {
+ String name;
+ String starts_with;
+ String ends_with;
+ String contains;
+
+ FileRule(String name, String starts_with, String ends_with, String contains) {
+ this.name = name;
+ this.starts_with = starts_with;
+ this.ends_with = ends_with;
+ this.contains = contains;
+ }
+
+ boolean applyRule(String source) {
+ boolean accept = true;
+ if (starts_with != null && starts_with.length() > 0 && !(source.startsWith(starts_with))) accept = false;
+ if (ends_with != null && ends_with.length() > 0 && !(source.endsWith(ends_with))) accept = false;
+ if (contains != null && contains.length() > 0 && source.indexOf(contains) < 0) accept = false;
+ return accept;
+ }
+}
+
+class ParseRule {
+ String name;
+ String before;
+ String after;
+
+ ParseRule(String name, String before, String after) {
+ this.name = name;
+ this.before = before;
+ this.after = after;
+ }
+
+ // returns the vector of strings found after before and before after
+
+ Vector applyRule(String source) {
+ Vector v = new Vector();
+ if (before != null && before.length() > 0) {
+ if (after != null && after.length() > 0) {
+ // Both before and after non-empty
+ int before_index = -1;
+ int after_index = -1;
+ while ((before_index = source.indexOf(before, ++before_index)) >= 0) {
+ //before_index = source.indexOf(before, before_index);
+ after_index = -1;
+ after_index = source.indexOf(after, before_index + before.length()+1);
+ if (after_index < 0 || before_index < 0 || before.length() < 0) {
+ break;
+ }
+ v.addElement(source.substring(before_index + before.length(), after_index));
+ before_index = after_index;
+ }
+ } else {
+ // Before non-empty, after empty
+ int index = -1;
+ while (source.indexOf(before, ++index) >= 0) {
+ index = source.indexOf(before, index);
+ String result = source.substring(index + before.length(), source.length());
+ if (result != null && result.length() > 0) v.addElement(result);
+ }
+ }
+ } else if (after != null && after.length() > 0) {
+ // Before empty, after not
+ int index = -1;
+ while (source.indexOf(after, ++index) >= 0) {
+ index = source.indexOf(before, index);
+ String result = source.substring(0, index);
+ if (result != null && result.length() > 0) v.addElement(result);
+ }
+ } else {
+ // Before and after empty
+ v.addElement(source);
+ }
+ return v;
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBTMXExporter.java b/unicodetools/com/ibm/rbm/RBTMXExporter.java
new file mode 100644
index 0000000..ac86c4f
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBTMXExporter.java
@@ -0,0 +1,225 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+
+import java.io.*;
+import javax.swing.*;
+import java.util.*;
+
+import org.apache.xerces.dom.DocumentImpl;
+import org.apache.xml.serialize.*;
+import org.w3c.dom.*;
+
+/**
+ * This class is a plug-in to RBManager that allows the user to export Resource Bundles
+ * along with some of the meta-data associated by RBManager to the TMX specification.
+ * For more information on TMX visit the web site <a href="http://www.lisa.org/tmx/">http://www.lisa.org/tmx/</a>
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBTMXExporter extends RBExporter {
+ private static final String VERSION = "0.5a";
+
+ /**
+ * Default constructor for the TMX exporter.
+ */
+
+ public RBTMXExporter() {
+ super();
+
+ // Initialize the file chooser if necessary
+ if (chooser == null) {
+ chooser = new JFileChooser();
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public String getDescription() {
+ return "TMX Files";
+ }
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().endsWith(".tmx")) return true;
+ return false;
+ }
+ });
+ } // end if
+ }
+
+ private String convertToISO(Date d) {
+ GregorianCalendar gc = new GregorianCalendar();
+ gc.setTime(d);
+ return convertToISO(gc);
+ }
+
+ private String convertToISO(GregorianCalendar gc) {
+ StringBuffer buffer = new StringBuffer();
+ buffer.append(String.valueOf(gc.get(Calendar.YEAR)));
+ int month = gc.get(Calendar.MONTH)+1;
+ buffer.append(((month < 10) ? "0" : "") + String.valueOf(month));
+ int day = gc.get(Calendar.DAY_OF_MONTH);
+ buffer.append(((day < 10) ? "0" : "") + String.valueOf(day));
+ buffer.append("T");
+ int hour = gc.get(Calendar.HOUR_OF_DAY);
+ buffer.append(((hour < 10) ? "0" : "") + String.valueOf(hour));
+ int minute = gc.get(Calendar.MINUTE);
+ buffer.append(((minute < 10) ? "0" : "") + String.valueOf(minute));
+ int second = gc.get(Calendar.SECOND);
+ buffer.append(((second < 10) ? "0" : "") + String.valueOf(second));
+ buffer.append("Z");
+ return buffer.toString();
+ }
+
+ private String convertEncoding(BundleItem item) {
+ if (item != null && item.getParentGroup() != null && item.getParentGroup().getParentBundle() != null) {
+ String language = item.getParentGroup().getParentBundle().getLanguageEncoding();
+ String country = item.getParentGroup().getParentBundle().getCountryEncoding();
+ String variant = item.getParentGroup().getParentBundle().getVariantEncoding();
+ if (language != null && !language.equals("")) {
+ //language = language.toUpperCase();
+ if (country != null && !country.equals("")) {
+ //country = country.toUpperCase();
+ if (variant != null && !variant.equals("")) {
+ //variant = variant.toUpperCase();
+ return language + "-" + country + "-" + variant;
+ }
+ return language + "-" + country;
+ }
+ return language;
+ }
+ }
+ return "";
+ }
+
+ private void appendTUV(Document xml, Element tu, BundleItem item) {
+ Element tuv = xml.createElement("tuv");
+ tuv.setAttribute("lang", convertEncoding(item));
+ tuv.setAttribute("creationdate",convertToISO(item.getCreatedDate()));
+ tuv.setAttribute("creationid",item.getCreator());
+ tuv.setAttribute("changedate",convertToISO(item.getModifiedDate()));
+ tuv.setAttribute("changeid",item.getModifier());
+ item.getComment();
+ item.isTranslated();
+
+ Element comment_prop = xml.createElement("prop");
+ comment_prop.appendChild(xml.createTextNode(item.getComment()));
+ comment_prop.setAttribute("type","x-Comment");
+ tuv.appendChild(comment_prop);
+
+ Element translated_prop = xml.createElement("prop");
+ translated_prop.appendChild(xml.createTextNode(String.valueOf(item.isTranslated())));
+ translated_prop.setAttribute("type","x-Translated");
+ tuv.appendChild(translated_prop);
+
+ Hashtable lookups = item.getLookups();
+ Enumeration enum = lookups.keys();
+ while (enum.hasMoreElements()) {
+ String key = (String)enum.nextElement();
+ String value = (String)lookups.get(key);
+ Element lookup_prop = xml.createElement("prop");
+ lookup_prop.appendChild(xml.createTextNode(key + "=" + value));
+ lookup_prop.setAttribute("type","x-Lookup");
+ tuv.appendChild(lookup_prop);
+ }
+
+ Element seg = xml.createElement("seg");
+ seg.appendChild(xml.createTextNode(item.getTranslation()));
+ tuv.appendChild(seg);
+
+ tu.appendChild(tuv);
+ }
+
+ public void export(RBManager rbm) throws IOException {
+ if (rbm == null) return;
+ // Open the Save Dialog
+ int ret_val = chooser.showSaveDialog(null);
+ if (ret_val != JFileChooser.APPROVE_OPTION) return;
+ // Retrieve basic file information
+ File file = chooser.getSelectedFile(); // The file(s) we will be working with
+ File directory = new File(file.getParent()); // The directory we will be writing to
+ String base_name = file.getName(); // The base name of the files we will write
+ if (base_name == null || base_name.equals("")) base_name = rbm.getBaseClass();
+ if (base_name.endsWith(".tmx")) base_name = base_name.substring(0,base_name.length()-4);
+
+ String file_name = base_name + ".tmx";
+
+ Vector bundle_v = rbm.getBundles();
+ Bundle main_bundle = (Bundle)bundle_v.elementAt(0);
+
+ Document xml = new DocumentImpl();
+ Element root = xml.createElement("tmx");
+ root.setAttribute("version", "1.2");
+ xml.appendChild(root);
+
+ Element header = xml.createElement("header");
+ Element note = xml.createElement("note");
+ note.appendChild(xml.createTextNode("This document was created automatically by RBManager"));
+ header.appendChild(note);
+ header.setAttribute("creationtool", "RBManager");
+ header.setAttribute("creationtoolversion", VERSION);
+ header.setAttribute("datatype", "PlainText");
+ header.setAttribute("segtype", "sentance");
+ header.setAttribute("adminlang", "en-us");
+ header.setAttribute("srclang", "EN");
+ header.setAttribute("o-tmf", "none");
+ header.setAttribute("creationdate", convertToISO(new Date()));
+ root.appendChild(header);
+
+ Element body = xml.createElement("body");
+ root.appendChild(body);
+
+ Vector group_v = main_bundle.getGroupsAsVector();
+ // Loop through each bundle group in main_bundle
+ for (int i=0; i < group_v.size(); i++) {
+ BundleGroup main_group = (BundleGroup)group_v.elementAt(i);
+ // Gather a group of groups of the same name as main_group
+ Vector all_groups_v = new Vector();
+ for (int j=1; j < bundle_v.size(); j++) {
+ Bundle bundle = (Bundle)bundle_v.elementAt(j);
+ if (bundle.hasGroup(main_group.getName())) {
+ Vector groups = bundle.getGroupsAsVector();
+ for (int k=0; k < groups.size(); k++) {
+ BundleGroup group = (BundleGroup)groups.elementAt(k);
+ if (group.getName().equals(main_group.getName())) all_groups_v.addElement(group);
+ }
+ }
+ } // end for - j
+ // Loop through each item in main_group
+ for (int j=0; j < main_group.getItemCount(); j++) {
+ BundleItem main_item = main_group.getBundleItem(j);
+ Element tu = xml.createElement("tu");
+ tu.setAttribute("tuid",main_item.getKey());
+ tu.setAttribute("datatype","Text");
+ // Insert the group name for the item
+ Element group_prop = xml.createElement("prop");
+ group_prop.appendChild(xml.createTextNode(main_group.getName()));
+ group_prop.setAttribute("type", "x-Group");
+ tu.appendChild(group_prop);
+ // Add the main_item to the xml
+ appendTUV(xml, tu, main_item);
+ // Loop through the rest of the groups of the same name as main_group
+ for (int k=0; k < all_groups_v.size(); k++) {
+ BundleGroup group = (BundleGroup)all_groups_v.elementAt(k);
+ // Loop through the items in each group
+ for (int l=0; l < group.getItemCount(); l++) {
+ BundleItem item = group.getBundleItem(l);
+ if (item.getKey().equals(main_item.getKey())) {
+ appendTUV(xml, tu, item);
+ break;
+ }
+ } // end for - l
+ } // end for - k
+ body.appendChild(tu);
+ } // end for - j
+ } // end for - i
+ FileWriter fw = new FileWriter(new File(directory,file_name));
+ OutputFormat of = new OutputFormat(xml);
+ of.setIndenting(true);
+ of.setEncoding("ISO-8859-1");
+ XMLSerializer serializer = new XMLSerializer(fw, of);
+ serializer.serialize(xml);
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBTMXImporter.java b/unicodetools/com/ibm/rbm/RBTMXImporter.java
new file mode 100644
index 0000000..1613c69
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBTMXImporter.java
@@ -0,0 +1,241 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import java.util.*;
+
+import javax.xml.parsers.*;
+
+import org.w3c.dom.*;
+import org.xml.sax.*;
+
+import com.ibm.rbm.gui.RBManagerGUI;
+
+
+/**
+ * This is the super class for all importer plug-in classes. This class defines the methods
+ * and functionality common to all importers. This includes setting up the options dialog and
+ * displaying it to the user, performing the actual insertions into the resource bundle manager,
+ * and managing any import conflicts.
+ *
+ * @author Jared Jackson
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBTMXImporter extends RBImporter {
+
+ Document tmx_xml = null;
+
+ /**
+ * Basic constructor for the TMX importer from the parent RBManager data and a Dialog title.
+ */
+
+ public RBTMXImporter(String title, RBManager rbm, RBManagerGUI gui) {
+ super(title, rbm, gui);
+ }
+
+ protected void setupFileChooser() {
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public boolean accept(File f) {
+ if (f.isDirectory()) return true;
+ if (f.getName().endsWith(".tmx")) return true;
+ return false;
+ }
+
+ public String getDescription() {
+ return Resources.getTranslation("import_TMX_file_description");
+ }
+ });
+ }
+
+ protected void beginImport() throws IOException {
+ super.beginImport();
+ File tmx_file = getChosenFile();
+
+ try {
+ FileInputStream fis = new FileInputStream(tmx_file);
+ InputSource is = new InputSource(fis);
+ DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
+ tmx_xml = builder.parse(is);
+ fis.close();
+ } catch (Exception e) {
+ e.printStackTrace(System.err);
+ throw new IOException(e.getMessage());
+ }
+ if (tmx_xml == null)
+ return;
+
+ importDoc();
+ }
+
+ private void importDoc() {
+ if (tmx_xml == null)
+ return;
+ Element root = tmx_xml.getDocumentElement();
+ Node node = root.getFirstChild();
+ while (node != null && (node.getNodeType() != Node.ELEMENT_NODE || !(node.getNodeName().equalsIgnoreCase("header")))) {
+ node = node.getNextSibling();
+ }
+ //ElementImpl header = (ElementImpl)node;
+ node = root.getFirstChild();
+ while (node != null && (node.getNodeType() != Node.ELEMENT_NODE || !(node.getNodeName().equalsIgnoreCase("body")))) {
+ node = node.getNextSibling();
+ }
+ Element body = (Element)node;
+ resolveEncodings(getEncodingsVector(body));
+
+ // Now do the actual import resource by resource
+ NodeList tu_list = body.getElementsByTagName("tu");
+ for (int i=0; i < tu_list.getLength(); i++) {
+ Element tu_elem = (Element)tu_list.item(i);
+ // Get the key value
+ String name = tu_elem.getAttribute("tuid");
+ if (name == null || name.length() < 1)
+ continue;
+ // Get the group if it exists
+ String group = null;
+ NodeList prop_list = tu_elem.getElementsByTagName("prop");
+ for (int j=0; j < prop_list.getLength(); j++) {
+ Element prop_elem = (Element)prop_list.item(j);
+ String type = prop_elem.getAttribute("type");
+ if (type != null && type.equals("x-Group")) {
+ prop_elem.normalize();
+ NodeList text_list = prop_elem.getChildNodes();
+ if (text_list.getLength() < 1)
+ continue;
+ Text text_elem = (Text)text_list.item(0);
+ group = text_elem.getNodeValue();
+ }
+ }
+ if (group == null || group.length() < 1) group = getDefaultGroup();
+
+ NodeList tuv_list = tu_elem.getElementsByTagName("tuv");
+ // For each tuv element
+ for (int j=0; j < tuv_list.getLength(); j++) {
+ Element tuv_elem = (Element)tuv_list.item(j);
+ String encoding = tuv_elem.getAttribute("lang");
+ // Get the current encoding
+ if (encoding == null) continue;
+ char array[] = encoding.toCharArray();
+ for (int k=0; k < array.length; k++) {
+ if (array[k] == '-')
+ array[k] = '_';
+ }
+ encoding = String.valueOf(array);
+ // Get the translation value
+ NodeList seg_list = tuv_elem.getElementsByTagName("seg");
+ if (seg_list.getLength() < 1)
+ continue;
+ Element seg_elem = (Element)seg_list.item(0);
+ seg_elem.normalize();
+ NodeList text_list = seg_elem.getChildNodes();
+ if (text_list.getLength() < 1)
+ continue;
+ Text text_elem = (Text)text_list.item(0);
+ String value = text_elem.getNodeValue();
+ if (value == null || value.length() < 1)
+ continue;
+ // Create the bundle item
+ BundleItem item = new BundleItem(null, name, value);
+ // Get creation, modification values
+ item.setCreatedDate(tuv_elem.getAttribute("creationdate"));
+ item.setModifiedDate(tuv_elem.getAttribute("changedate"));
+ if (tuv_elem.getAttribute("changeid") != null)
+ item.setModifier(tuv_elem.getAttribute("changeid"));
+ if (tuv_elem.getAttribute("creationid") != null)
+ item.setCreator(tuv_elem.getAttribute("creationid"));
+ // Get properties specified
+ prop_list = tuv_elem.getElementsByTagName("prop");
+ Hashtable lookups = null;
+ for (int k=0; k < prop_list.getLength(); k++) {
+ Element prop_elem = (Element)prop_list.item(k);
+ String type = prop_elem.getAttribute("type");
+ if (type != null && type.equals("x-Comment")) {
+ // Get the comment
+ prop_elem.normalize();
+ text_list = prop_elem.getChildNodes();
+ if (text_list.getLength() < 1) continue;
+ text_elem = (Text)text_list.item(0);
+ String comment = text_elem.getNodeValue();
+ if (comment != null && comment.length() > 0)
+ item.setComment(comment);
+ } else if (type != null && type.equals("x-Translated")) {
+ // Get the translated flag value
+ prop_elem.normalize();
+ text_list = prop_elem.getChildNodes();
+ if (text_list.getLength() < 1) continue;
+ text_elem = (Text)text_list.item(0);
+ if (text_elem.getNodeValue() != null) {
+ if (text_elem.getNodeValue().equalsIgnoreCase("true"))
+ item.setTranslated(true);
+ else if (text_elem.getNodeValue().equalsIgnoreCase("false"))
+ item.setTranslated(false);
+ else
+ item.setTranslated(getDefaultTranslated());
+ }
+ else
+ item.setTranslated(getDefaultTranslated());
+ } else if (type != null && type.equals("x-Lookup")) {
+ // Get a lookup value
+ prop_elem.normalize();
+ text_list = prop_elem.getChildNodes();
+ if (text_list.getLength() < 1)
+ continue;
+ text_elem = (Text)text_list.item(0);
+ if (text_elem.getNodeValue() != null) {
+ String text = text_elem.getNodeValue();
+ if (text.indexOf("=") > 0) {
+ try {
+ if (lookups == null) lookups = new Hashtable();
+ String lkey = text.substring(0,text.indexOf("="));
+ String lvalue = text.substring(text.indexOf("=")+1,text.length());
+ lookups.put(lkey, lvalue);
+ } catch (Exception ex) { /* String out of bounds - Ignore and go on */ }
+ }
+ }
+ else
+ item.setTranslated(getDefaultTranslated());
+ }
+ }
+ if (lookups != null) item.setLookups(lookups);
+ importResource(item, encoding, group);
+ }
+ }
+ }
+
+ private Vector getEncodingsVector(Element body) {
+ String empty = "";
+ if (body == null)
+ return null;
+ Hashtable hash = new Hashtable();
+ NodeList tu_list = body.getElementsByTagName("tu");
+ for (int i=0; i < tu_list.getLength(); i++) {
+ Element tu_elem = (Element)tu_list.item(i);
+ NodeList tuv_list = tu_elem.getElementsByTagName("tuv");
+ for (int j=0; j < tuv_list.getLength(); j++) {
+ Element tuv_elem = (Element)tuv_list.item(j);
+ String encoding = tuv_elem.getAttribute("lang");
+ if (encoding == null)
+ continue;
+ char array[] = encoding.toCharArray();
+ for (int k=0; k < array.length; k++) {
+ if (array[k] == '-')
+ array[k] = '_';
+ }
+ encoding = String.valueOf(array);
+ if (!(hash.containsKey(encoding)))
+ hash.put(encoding,empty);
+ }
+ }
+ Vector v = new Vector();
+ Enumeration enum = hash.keys();
+ while (enum.hasMoreElements()) {
+ v.addElement(enum.nextElement());
+ }
+ return v;
+ }
+}
\ No newline at end of file
diff --git a/unicodetools/com/ibm/rbm/RBxliffExporter.java b/unicodetools/com/ibm/rbm/RBxliffExporter.java
new file mode 100644
index 0000000..87e7b6b
--- /dev/null
+++ b/unicodetools/com/ibm/rbm/RBxliffExporter.java
@@ -0,0 +1,388 @@
+/*
+ *****************************************************************************
+ * Copyright (C) 2000-2004, International Business Machines Corporation and *
+ * others. All Rights Reserved. *
+ *****************************************************************************
+ */
+package com.ibm.rbm;
+
+import java.io.*;
+import java.util.*;
+import java.text.*;
+
+import javax.swing.*;
+import javax.xml.parsers.*;
+import javax.xml.transform.*;
+import javax.xml.transform.dom.*;
+import javax.xml.transform.stream.*;
+
+import org.w3c.dom.*;
+
+/**
+ * This class is a plug-in to RBManager that allows the user to export Resource Bundles
+ * along with some of the meta-data associated by RBManager to the XLIFF specification.
+ * For more information on XLIFF visit the web site
+ * <a href="http://www.lisa.org/xliff/">http://www.lisa.org/xliff/</a>
+ *
+ * @author George Rhoten
+ * @see com.ibm.rbm.RBManager
+ */
+public class RBxliffExporter extends RBExporter {
+ private static final String VERSION = "0.7";
+ private static final String XLIFF_DTD = "http://www.oasis-open.org/committees/xliff/documents/xliff.dtd";
+ private static final String XLIFF_PUBLIC_NAME = "-//XLIFF//DTD XLIFF//EN";
+ private SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
+
+ /**
+ * Default constructor for the XLIFF exporter.
+ */
+
+ public RBxliffExporter() {
+ super();
+
+ // Initialize the file chooser if necessary
+ if (chooser == null) {
+ chooser = new JFileChooser();
+ chooser.setFileFilter(new javax.swing.filechooser.FileFilter(){
+ public String getDescription() {
+ return "XLIFF Files";
+ }
+ public boolean accept(File f) {
+ return (f.isDirectory() || f.getName().endsWith(".xlf"));
+ }
+ });
+ }
+ }
+
+ private String convertToISO(Date d) {
+ GregorianCalendar gc = new GregorianCalendar();
+ gc.setTime(d);
+ return convertToISO(gc);
+ }
+
+ private String convertToISO(GregorianCalendar gc) {
+ dateFormat.setCalendar(gc);
+ return dateFormat.format(gc.getTime());
+ }
+
+ private String getLocale(Bundle item) {
+ String language = item.getLanguageEncoding();
+ if (language != null && !language.equals("")) {
+ //language = language.toUpperCase();
+ String country = item.getCountryEncoding();
+ if (country != null && !country.equals("")) {
+ //country = country.toUpperCase();
+ String variant = item.getVariantEncoding();
+ if (variant != null && !variant.equals("")) {
+ //variant = variant.toUpperCase();
+ return language + "-" + country + "-" + variant;
+ }
+ return language + "-" + country;
+ }
+ return language;
+ }
+ return "";
+ }
+
+ private String getParentLocale(String locale) {
+
+ int truncIndex = locale.lastIndexOf('-');
+ if (truncIndex > 0) {
+ locale = locale.substring(0, truncIndex);
+ }
+ else {
+ locale = "";
+ }
+ return locale;
+ }
+
+ private void addTransUnit(Document xml, Element groupElem, BundleItem item, BundleItem parent_item) {
+ Element transUnit = xml.createElement("trans-unit");
+ transUnit.setAttribute("date",convertToISO(item.getModifiedDate()));
+ transUnit.setAttribute("id",item.getKey());
+
+ String sourceOrTarget = "target";
+ if (parent_item == null) {
+ sourceOrTarget = "source";
+ }
+ else {
+ Element source = xml.createElement("source");
+ source.setAttribute("xml:space","preserve");
+ source.appendChild(xml.createTextNode(parent_item.getTranslation()));
+ transUnit.appendChild(source);
+ }
+ Element target = xml.createElement(sourceOrTarget);
+ target.setAttribute("xml:space","preserve");
+ // This is different from the translate attribute
+ if (item.isTranslated()) {
+ // TODO Handle the other states in the future.
+ transUnit.setAttribute("state", "translated");
+ }
+ target.appendChild(xml.createTextNode(item.getTranslation()));
+ transUnit.appendChild(target);
+
+ if (item.getComment() != null && item.getComment().length() > 1) {
+ Element comment_prop = xml.createElement("note");
+ comment_prop.setAttribute("xml:space","preserve");
+ comment_prop.appendChild(xml.createTextNode(item.getComment()));
+ transUnit.appendChild(comment_prop);
+ }
+
+ if ((item.getCreator() != null && item.getCreator().length() > 1)
+ || (item.getModifier() != null && item.getModifier().length() > 1))
+ {
+ Element transUnit_prop_group_elem = xml.createElement("prop-group");
+
+ if (item.getCreator() != null && item.getCreator().length() > 1) {
+ Element creator_prop = xml.createElement("prop");
+ creator_prop.setAttribute("prop-type","creator");
+ creator_prop.appendChild(xml.createTextNode(item.getCreator()));
+ transUnit_prop_group_elem.appendChild(creator_prop);
+ }
+
+ if (item.getCreator() != null && item.getCreator().length() > 1) {
+ Element created_prop = xml.createElement("prop");
+ created_prop.setAttribute("prop-type","created");
+ created_prop.appendChild(xml.createTextNode(convertToISO(item.getCreatedDate())));
+ transUnit_prop_group_elem.appendChild(created_prop);
+ }
+
+ if (item.getModifier() != null && item.getModifier().length() > 1) {
+ Element modifier_prop = xml.createElement("prop");
+ modifier_prop.setAttribute("prop-type","modifier");
+ modifier_prop.appendChild(xml.createTextNode(item.getModifier()));
+ transUnit_prop_group_elem.appendChild(modifier_prop);
+ }
+
+ transUnit.appendChild(transUnit_prop_group_elem);
+ }
+
+ groupElem.appendChild(transUnit);
+ }
+
+ public void export(RBManager rbm) throws IOException {
+ if (rbm == null)
+ return;
+ // Open the Save Dialog
+ int ret_val = chooser.showSaveDialog(null);
+ if (ret_val != JFileChooser.APPROVE_OPTION)
+ return;
+ // Retrieve basic file information
+ File file = chooser.getSelectedFile(); // The file(s) we will be working with
+ File directory = new File(file.getParent()); // The directory we will be writing to
+ String base_name = file.getName(); // The base name of the files we will write
+ if (base_name == null || base_name.equals(""))
+ base_name = rbm.getBaseClass();
+ if (base_name.endsWith(".xlf"))
+ base_name = base_name.substring(0,base_name.length()-4);
+
+ String file_name = base_name + ".xlf";
+
+ Vector bundle_v = rbm.getBundles();
+ Enumeration bundleIter = bundle_v.elements();
+ while (bundleIter.hasMoreElements()) {
+ exportFile(rbm, directory, base_name, (Bundle)bundleIter.nextElement());
+ }
+ }
+
+ private void addHeaderProperties(Document xml, Element header, Bundle main_bundle) {
+ if (main_bundle.comment != null && main_bundle.comment.length() > 0) {
+ Element note = xml.createElement("note");
+ header.appendChild(note);
+ note.appendChild(xml.createTextNode(main_bundle.comment));
+ note.setAttribute("xml:space","preserve");
+ }
+ if ((main_bundle.name != null && main_bundle.name.length() > 0)
+ || (main_bundle.manager != null && main_bundle.manager.length() > 0)
+ || (main_bundle.language != null && main_bundle.language.length() > 0)
+ || (main_bundle.country != null && main_bundle.country.length() > 0)
+ || (main_bundle.variant != null && main_bundle.variant.length() > 0))
+ {
+ Element prop_group = xml.createElement("prop-group");
+ header.appendChild(prop_group);
+ if (main_bundle.name != null && main_bundle.name.length() > 0) {
+ Element prop = xml.createElement("prop");