ICU-21237 Improving how file deletion and cleanup works by adding a build label.
See #1246
diff --git a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java
index 382f76d..b2ef028 100644
--- a/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java
+++ b/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTask.java
@@ -50,6 +50,21 @@
private static final CharMatcher NOT_WHITESPACE = CharMatcher.whitespace().negate();
+ private static final String HEADER_FILE = "ldml2icu_header.txt";
+
+ // If present in the header of a file, this line is used to determine that the file was
+ // auto-generated. This allows us to change the rest of the header freely without issue.
+ // However if it's not present in the file, we fallback to comparing the rest of the
+ // header without it (since that's the old behaviour).
+ // Once there's been an ICU release with this line included in the headers of all data
+ // files, we can remove the fallback and just test for this line and nothing else.
+ private static final String WAS_GENERATED_LABEL =
+ "Generated using tools/cldr/cldr-to-icu/build-icu-data.xml";
+
+ // The number of header lines to check before giving up if we don't find the generated
+ // label.
+ private static final int MAX_HEADER_CHECK_LINES = 20;
+
private Path root = null;
private boolean forceDelete = false;
private final List<Dir> outputDirs = new ArrayList<>();
@@ -57,7 +72,13 @@
public CleanOutputDirectoryTask() {
// TODO: Consider passing in header lines via Ant?
- this.headerLines = readLinesFromResource("/ldml2icu_header.txt");
+ this.headerLines = readLinesFromResource("/" + HEADER_FILE);
+ // For now assume that the generated label is the last line of the header.
+ checkState(Iterables.getLast(headerLines).equals(WAS_GENERATED_LABEL),
+ "Expected last line of %s header file to be:\n\t%s", HEADER_FILE, WAS_GENERATED_LABEL);
+ // Make sure we check at least a few more lines than is in the current header.
+ checkState(MAX_HEADER_CHECK_LINES >= headerLines.size() + 5,
+ "Unexpectedly large header file; please increase MAX_HEADER_CHECK_LINES constant");
}
public static final class Retain extends Task {
@@ -214,31 +235,69 @@
return false;
}
try (BufferedReader r = Files.newBufferedReader(path, UTF_8)) {
- // A byte-order-mark (BOM) is added to ICU data files, but not JSON deps files, so just
- // treat it as optional everywhere (it's not the important thing we check here).
- r.mark(1);
- int maybeByteOrderMark = r.read();
- if (maybeByteOrderMark != '\uFEFF') {
- // Also reset if the file was empty, but that should be harmless.
- r.reset();
- }
- for (String headerLine : headerLines) {
- String line = r.readLine();
- if (line == null) {
- return false;
- }
- int headerStart = skipComment(line);
- if (headerStart < 0
- || !line.regionMatches(headerStart, headerLine, 0, headerLine.length())) {
- return false;
- }
- }
- return true;
+ return wasFileAutoGenerated(r, headerLines);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
+ // TODO: Once the WAS_GENERATED_LABEL is in all auto-generated ICU data files, simplify this.
+ // Static and non-private for testing.
+ static boolean wasFileAutoGenerated(BufferedReader fileReader, ImmutableList<String> headerLines)
+ throws IOException {
+ // A byte-order-mark (BOM) is added to ICU data files, but not JSON deps files, so just
+ // treat it as optional everywhere (it's not the important thing we check here).
+ fileReader.mark(1);
+ int maybeByteOrderMark = fileReader.read();
+ if (maybeByteOrderMark != '\uFEFF') {
+ // Also reset if the file was empty, but that should be harmless.
+ fileReader.reset();
+ }
+ boolean isLenientHeaderMatchSoFar = true;
+ for (int n = 0; n < MAX_HEADER_CHECK_LINES ; n++) {
+ String line = fileReader.readLine();
+ // True if we have processed the header, not including the trailing generated label.
+ boolean headerIsProcessed = n >= headerLines.size() - 1;
+ boolean isCompleteLenientMatch = isLenientHeaderMatchSoFar && headerIsProcessed;
+ if (line == null) {
+ // We ran off the end of the file, so we're done.
+ return isCompleteLenientMatch;
+ }
+ int headerStart = skipComment(line);
+ if (headerStart < 0) {
+ // We ran off the end of the expected comment section, so we're done.
+ return isCompleteLenientMatch;
+ }
+ if (matchesToEndOfLine(line, headerStart, WAS_GENERATED_LABEL)) {
+ // Finding the generated label trumps any lenient matching.
+ return true;
+ }
+ if (!isLenientHeaderMatchSoFar) {
+ // We already failed at lenient matching, so keep going in the hope of finding
+ // the generated label (we don't need to check the header any more).
+ continue;
+ }
+ if (headerIsProcessed) {
+ // We finishing processing the header and it matched (not including the
+ // generated label) but for older data files, that's fine.
+ return true;
+ }
+ // Check the next header line (not including the trailing generated label).
+ isLenientHeaderMatchSoFar = matchesToEndOfLine(line, headerStart, headerLines.get(n));
+ }
+ // This is actually an unusual case. It corresponds to a file which:
+ // * has a leading comment section at least as long as MAX_HEADER_CHECK_LINES
+ // * does not contain WAS_GENERATED_LABEL anywhere in the first MAX_HEADER_CHECK_LINES
+ // Most files checked are expected to match, and so will not get here, but instead
+ // return via one of the return statements above.
+ return false;
+ }
+
+ private static boolean matchesToEndOfLine(String line, int start, String expected) {
+ return line.length() - start == expected.length()
+ && line.regionMatches(start, expected, 0, expected.length());
+ }
+
private static int skipComment(String line) {
if (line.startsWith("#")) {
return toCommentStart(line, 1);
diff --git a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt
index 70f6b83..48c27a0 100644
--- a/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt
+++ b/tools/cldr/cldr-to-icu/src/main/resources/ldml2icu_header.txt
@@ -1,2 +1,3 @@
© 2016 and later: Unicode, Inc. and others.
License & terms of use: http://www.unicode.org/copyright.html#License
+Generated using tools/cldr/cldr-to-icu/build-icu-data.xml
diff --git a/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java
new file mode 100644
index 0000000..bc8d854
--- /dev/null
+++ b/tools/cldr/cldr-to-icu/src/test/java/org/unicode/icu/tool/cldrtoicu/ant/CleanOutputDirectoryTaskTest.java
@@ -0,0 +1,84 @@
+package org.unicode.icu.tool.cldrtoicu.ant;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.truth.BooleanSubject;
+import com.google.common.truth.Truth;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Arrays;
+
+@RunWith(JUnit4.class)
+public class CleanOutputDirectoryTaskTest {
+ // Not using the original field since we want this test to fail if this changes unexpectedly.
+ private static final String WAS_GENERATED_LABEL =
+ "Generated using tools/cldr/cldr-to-icu/build-icu-data.xml";
+
+ // Commented version of the label for test data.
+ private static final String WAS_GENERATED_LINE = "// " + WAS_GENERATED_LABEL;
+
+ private final static ImmutableList<String> HEADER = ImmutableList.of(
+ "Header#1",
+ "Header#2",
+ WAS_GENERATED_LABEL);
+
+ @Test
+ public void testWasFileAutoGenerated_lenientMatching() throws IOException {
+ // Testing comment prefixes (// or # supported equally)
+ assertWasAutogenerated("// Header#1", "// Header#2").isTrue();
+ assertWasAutogenerated("# Header#1", "# Header#2").isTrue();
+ assertWasAutogenerated("# Header#1", "// Header#2").isTrue();
+ assertWasAutogenerated("// Header#1", "//Header#2").isTrue();
+ // Extra lines
+ assertWasAutogenerated("// Header#1", "// Header#2", "// More comment", "Not a comment").isTrue();
+ // BOM is ignored on first line
+ assertWasAutogenerated("\uFEFF// Header#1", "// Header#2").isTrue();
+ }
+
+ @Test
+ public void testWasFileAutoGenerated_lenientMatching_fail() throws IOException {
+ // New blank line.
+ assertWasAutogenerated("", "// Header#1", "// Header#2").isFalse();
+ // Duplicated line.
+ assertWasAutogenerated("// Header#1", "// Header#1").isFalse();
+ // Reversed lines.
+ assertWasAutogenerated("// Header#2", "// Header#1").isFalse();
+ // Not commented.
+ assertWasAutogenerated("Header#1", "Header#2").isFalse();
+ // Extra blank line.
+ assertWasAutogenerated("// Header#1", "", "// Header#2").isFalse();
+ // Misplaced BOM.
+ assertWasAutogenerated("// Header#1", "\uFEFF// Header#2").isFalse();
+ }
+
+ @Test
+ public void testWasFileAutoGenerated_withLabel() throws IOException {
+ // With the label in the header comment section everything passes.
+ assertWasAutogenerated("// Header#1", "// Header#2", WAS_GENERATED_LINE).isTrue();
+ assertWasAutogenerated("// Hello", "// World", WAS_GENERATED_LINE).isTrue();
+ assertWasAutogenerated("// Shorter Header", WAS_GENERATED_LINE).isTrue();
+ assertWasAutogenerated("// This", "// Is", "// A", "// Longer", "// Header", WAS_GENERATED_LINE).isTrue();
+ assertWasAutogenerated("// Where the label is", WAS_GENERATED_LINE, "// Does not matter").isTrue();
+ assertWasAutogenerated(WAS_GENERATED_LINE).isTrue();
+ }
+
+ @Test
+ public void testWasFileAutoGenerated_matchingEndsAfterComments() throws IOException {
+ assertWasAutogenerated("// Header#1", "// Header#Changed", WAS_GENERATED_LINE).isTrue();
+ // Label outside the header comment section does not count.
+ assertWasAutogenerated("// Header#1", "// Header#Changed", "Not a comment", WAS_GENERATED_LINE).isFalse();
+ }
+
+ private static BooleanSubject assertWasAutogenerated(String... fileLines) throws IOException {
+ return Truth.assertWithMessage("wasAutogenerated: %s", Arrays.asList(fileLines))
+ .that(CleanOutputDirectoryTask.wasFileAutoGenerated(testFile(fileLines), HEADER));
+ }
+
+ private static BufferedReader testFile(String... lines) {
+ return new BufferedReader(new StringReader(String.join("\n", lines) + "\n"));
+ }
+}