tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/RbPath.java - external/github.com/unicode-org/icu - Git at Google

 // © 2019 and later: Unicode, Inc. and others.
 // License & terms of use: http://www.unicode.org/copyright.html
 package org.unicode.icu.tool.cldrtoicu;

 import static com.google.common.base.CharMatcher.whitespace;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkState;
 import static com.google.common.collect.ImmutableList.toImmutableList;

 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.Objects;
 import java.util.function.Function;

 import com.google.common.base.CharMatcher;
 import com.google.common.base.Splitter;
 import com.google.common.collect.Comparators;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;

 /**
  * A resource bundle path, used to identify entries in ICU data.
  *
  * <p>Immutable and thread safe.
  */
 public final class RbPath implements Comparable<RbPath> {
     private static final Splitter PATH_SPLITTER = Splitter.on('/').trimResults();

     // This defines ordering of paths in IcuData instances and thus the order in ICU data files.
     // If there's ever a reason to have a different "natural" order for paths, this Comparator
     // should be moved into the ICU file writer class(es).
     private static final Comparator<RbPath> ORDERING =
         Comparator.comparing(
             p -> p.segments,
             Comparators.lexicographical(Comparator.<String>naturalOrder()));

     // Matches the definition of invariant characters in "uinvchar.cpp". We can make this all much
     // faster if needed with a custom matcher (it's just a 128 way bit lookup via 2 longs).
     private static final CharMatcher INVARIANT_CHARS =
         CharMatcher.ascii().and(CharMatcher.anyOf("!#$@[\\]^`{|}~").negate());

     // Note that we must also prohibit double-quote from appearing anywhere other than surrounding
     // segment values. This is because some segment values can contain special ICU data characters
     // (e.g. ':') but must be treated as literals. There is not proper "escaping" mechanism in ICU
     // data for key values (since '\' is not an invariant, things like \\uxxxx are not possible).
     //
     // Ideally quoting would be done when the file is written, but that would require additional
     // complexity in RbPath, since suffixes like ":intvector" must not be quoted and must somehow
     // be distinguished from timezone "metazone" names which also contain ':'.
     private static final CharMatcher QUOTED_SEGMENT_CHARS =
         INVARIANT_CHARS
             .and(CharMatcher.javaIsoControl().negate())
             .and(CharMatcher.isNot('"'));
     private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
         QUOTED_SEGMENT_CHARS.and(whitespace().negate());

     /**
      * Returns a path with the specified segments in (possibly empty). Note that unlike
      * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
      * element constructed by this method.
      */
     public static RbPath of(String... segments) {
         return of(Arrays.asList(segments));
     }

     /**
      * Returns a path with the specified segments in (possibly empty). Note that unlike
      * {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
      * element constructed by this method.
      */
     public static RbPath of(Iterable<String> segments) {
         return new RbPath(segments);
     }

     /** Parses the given path string, assuming {@code '/'} as a path separator. */
     public static RbPath parse(String path) {
         checkArgument(!path.isEmpty(), "cannot parse an empty path string");
         // Allow leading '/', but don't allow empty segments anywhere else.
         if (path.startsWith("/")) {
             path = path.substring(1);
         }
         return new RbPath(PATH_SPLITTER.split(path));
     }

     /** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */
     public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
         int maxLength = Math.min(lhs.length(), rhs.length());
         int n = 0;
         while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) {
             n++;
         }
         return n;
     }

     private final ImmutableList<String> segments;
     private final int hashCode;

     private RbPath(Iterable<String> segments) {
         this.segments = ImmutableList.copyOf(segments);
         // Use "this.segments" since the incoming list can have a different hash!
         this.hashCode = Objects.hash(this.segments);
         for (String segment : this.segments) {
             checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
             // Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only
             // contain double quotes at either end, or not at all. If the string is quoted, only
             // validate the content, and not the quotes themselves.
             switch (segment.charAt(0)) {
             case '<':
                 // Allow anything in hidden labels, since they will be removed later and never
                 // appear in the final ICU data.
                 checkArgument(segment.endsWith(">"),
                     "mismatched quoting for hidden label: %s", segment);
                 continue;

             case '"':
                 checkArgument(segment.endsWith("\""),
                     "mismatched quoting for segment: %s", segment);
                 checkArgument(
                     QUOTED_SEGMENT_CHARS.matchesAllOf(segment.substring(1, segment.length() - 1)),
                     "invalid character in unquoted resource bundle path segment: %s", segment);
                 break;

             default:
                 checkArgument(
                     UNQUOTED_SEGMENT_CHARS.matchesAllOf(segment),
                     "invalid character in unquoted resource bundle path segment: %s", segment);
                 break;
             }
         }
     }

     /** Returns the number of segments in this path. */
     public int length() {
         return segments.size();
     }

     /** Returns the Nth segments in this path. */
     public String getSegment(int n) {
         return segments.get(n);
     }

     /** Returns a new path extended at the end by the specified segments. */
     public RbPath extendBy(String... parts) {
         return new RbPath(Iterables.concat(segments, Arrays.asList(parts)));
     }

     /** Returns whether this path starts with the specified prefix. */
     public boolean startsWith(RbPath prefix) {
         return prefix.length() <= length() && matchesSublist(prefix, 0);
     }

     /** Returns whether this path ends with the specified suffix. */
     public boolean endsWith(RbPath suffix) {
         return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length());
     }

     /** Returns whether this path contains the specified path. */
     public boolean contains(RbPath path) {
         int maxOffset = length() - path.length();
         for (int i = 0; i <= maxOffset; i++) {
             if (matchesSublist(path, i)) {
                 return true;
             }
         }
         return false;
     }

     // Assume length check has been done.
     private boolean matchesSublist(RbPath path, int offset) {
         for (int i = 0; i < path.length(); i++) {
             if (!path.getSegment(i).equals(getSegment(i + offset))) {
                 return false;
             }
         }
         return true;
     }

     // TODO: Remove this and isAnonymous() since they are only called once each, in the same place.
     public RbPath getParent() {
         checkState(length() > 0, "cannot get parent of the empty path");
         return new RbPath(segments.subList(0, length() - 1));
     }

     public boolean isAnonymous() {
         return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
     }

     // TODO: Remove this special case code (called exactly once).
     public RbPath mapSegments(Function<? super String, String> fn) {
         return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
     }

     // TODO: Remove this in favour of having properly typed paths.
     boolean isIntPath() {
         String lastElement = segments.get(segments.size() - 1);
         return lastElement.endsWith(":int") || lastElement.endsWith(":intvector");
     }

     @Override public int compareTo(RbPath other) {
         return ORDERING.compare(this, other);
     }

     @Override public boolean equals(Object other) {
         return (other instanceof RbPath) && segments.equals(((RbPath) other).segments);
     }

     @Override public int hashCode() {
         return hashCode;
     }

     @Override public String toString() {
         return String.join("/", segments);
     }
 }
	// © 2019 and later: Unicode, Inc. and others.
	// License & terms of use: http://www.unicode.org/copyright.html
	package org.unicode.icu.tool.cldrtoicu;

	import static com.google.common.base.CharMatcher.whitespace;
	import static com.google.common.base.Preconditions.checkArgument;
	import static com.google.common.base.Preconditions.checkState;
	import static com.google.common.collect.ImmutableList.toImmutableList;

	import java.util.Arrays;
	import java.util.Comparator;
	import java.util.Objects;
	import java.util.function.Function;

	import com.google.common.base.CharMatcher;
	import com.google.common.base.Splitter;
	import com.google.common.collect.Comparators;
	import com.google.common.collect.ImmutableList;
	import com.google.common.collect.Iterables;

	/**
	* A resource bundle path, used to identify entries in ICU data.
	*
	* <p>Immutable and thread safe.
	*/
	public final class RbPath implements Comparable<RbPath> {
	private static final Splitter PATH_SPLITTER = Splitter.on('/').trimResults();

	// This defines ordering of paths in IcuData instances and thus the order in ICU data files.
	// If there's ever a reason to have a different "natural" order for paths, this Comparator
	// should be moved into the ICU file writer class(es).
	private static final Comparator<RbPath> ORDERING =
	Comparator.comparing(
	p -> p.segments,
	Comparators.lexicographical(Comparator.<String>naturalOrder()));

	// Matches the definition of invariant characters in "uinvchar.cpp". We can make this all much
	// faster if needed with a custom matcher (it's just a 128 way bit lookup via 2 longs).
	private static final CharMatcher INVARIANT_CHARS =
	CharMatcher.ascii().and(CharMatcher.anyOf("!#$@[\\]^`{\|}~").negate());

	// Note that we must also prohibit double-quote from appearing anywhere other than surrounding
	// segment values. This is because some segment values can contain special ICU data characters
	// (e.g. ':') but must be treated as literals. There is not proper "escaping" mechanism in ICU
	// data for key values (since '\' is not an invariant, things like \\uxxxx are not possible).
	//
	// Ideally quoting would be done when the file is written, but that would require additional
	// complexity in RbPath, since suffixes like ":intvector" must not be quoted and must somehow
	// be distinguished from timezone "metazone" names which also contain ':'.
	private static final CharMatcher QUOTED_SEGMENT_CHARS =
	INVARIANT_CHARS
	.and(CharMatcher.javaIsoControl().negate())
	.and(CharMatcher.isNot('"'));
	private static final CharMatcher UNQUOTED_SEGMENT_CHARS =
	QUOTED_SEGMENT_CHARS.and(whitespace().negate());

	/**
	* Returns a path with the specified segments in (possibly empty). Note that unlike
	* {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
	* element constructed by this method.
	*/
	public static RbPath of(String... segments) {
	return of(Arrays.asList(segments));
	}

	/**
	* Returns a path with the specified segments in (possibly empty). Note that unlike
	* {@link #parse(String)}, {@code '/'} is not treated specially and can be present in a path
	* element constructed by this method.
	*/
	public static RbPath of(Iterable<String> segments) {
	return new RbPath(segments);
	}

	/** Parses the given path string, assuming {@code '/'} as a path separator. */
	public static RbPath parse(String path) {
	checkArgument(!path.isEmpty(), "cannot parse an empty path string");
	// Allow leading '/', but don't allow empty segments anywhere else.
	if (path.startsWith("/")) {
	path = path.substring(1);
	}
	return new RbPath(PATH_SPLITTER.split(path));
	}

	/** Returns the common prefix length of two paths (useful when thinking of path hierarchies). */
	public static int getCommonPrefixLength(RbPath lhs, RbPath rhs) {
	int maxLength = Math.min(lhs.length(), rhs.length());
	int n = 0;
	while (n < maxLength && lhs.getSegment(n).equals(rhs.getSegment(n))) {
	n++;
	}
	return n;
	}

	private final ImmutableList<String> segments;
	private final int hashCode;

	private RbPath(Iterable<String> segments) {
	this.segments = ImmutableList.copyOf(segments);
	// Use "this.segments" since the incoming list can have a different hash!
	this.hashCode = Objects.hash(this.segments);
	for (String segment : this.segments) {
	checkArgument(!segment.isEmpty(), "path segments must not be empty: %s", this.segments);
	// Either the label is quoted (e.g. "foo") or it is bar (e.g. foo) but it can only
	// contain double quotes at either end, or not at all. If the string is quoted, only
	// validate the content, and not the quotes themselves.
	switch (segment.charAt(0)) {
	case '<':
	// Allow anything in hidden labels, since they will be removed later and never
	// appear in the final ICU data.
	checkArgument(segment.endsWith(">"),
	"mismatched quoting for hidden label: %s", segment);
	continue;

	case '"':
	checkArgument(segment.endsWith("\""),
	"mismatched quoting for segment: %s", segment);
	checkArgument(
	QUOTED_SEGMENT_CHARS.matchesAllOf(segment.substring(1, segment.length() - 1)),
	"invalid character in unquoted resource bundle path segment: %s", segment);
	break;

	default:
	checkArgument(
	UNQUOTED_SEGMENT_CHARS.matchesAllOf(segment),
	"invalid character in unquoted resource bundle path segment: %s", segment);
	break;
	}
	}
	}

	/** Returns the number of segments in this path. */
	public int length() {
	return segments.size();
	}

	/** Returns the Nth segments in this path. */
	public String getSegment(int n) {
	return segments.get(n);
	}

	/** Returns a new path extended at the end by the specified segments. */
	public RbPath extendBy(String... parts) {
	return new RbPath(Iterables.concat(segments, Arrays.asList(parts)));
	}

	/** Returns whether this path starts with the specified prefix. */
	public boolean startsWith(RbPath prefix) {
	return prefix.length() <= length() && matchesSublist(prefix, 0);
	}

	/** Returns whether this path ends with the specified suffix. */
	public boolean endsWith(RbPath suffix) {
	return suffix.length() <= length() && matchesSublist(suffix, length() - suffix.length());
	}

	/** Returns whether this path contains the specified path. */
	public boolean contains(RbPath path) {
	int maxOffset = length() - path.length();
	for (int i = 0; i <= maxOffset; i++) {
	if (matchesSublist(path, i)) {
	return true;
	}
	}
	return false;
	}

	// Assume length check has been done.
	private boolean matchesSublist(RbPath path, int offset) {
	for (int i = 0; i < path.length(); i++) {
	if (!path.getSegment(i).equals(getSegment(i + offset))) {
	return false;
	}
	}
	return true;
	}

	// TODO: Remove this and isAnonymous() since they are only called once each, in the same place.
	public RbPath getParent() {
	checkState(length() > 0, "cannot get parent of the empty path");
	return new RbPath(segments.subList(0, length() - 1));
	}

	public boolean isAnonymous() {
	return length() > 0 && segments.get(length() - 1).charAt(0) == '<';
	}

	// TODO: Remove this special case code (called exactly once).
	public RbPath mapSegments(Function<? super String, String> fn) {
	return new RbPath(segments.stream().map(fn).collect(toImmutableList()));
	}

	// TODO: Remove this in favour of having properly typed paths.
	boolean isIntPath() {
	String lastElement = segments.get(segments.size() - 1);
	return lastElement.endsWith(":int") \|\| lastElement.endsWith(":intvector");
	}

	@Override public int compareTo(RbPath other) {
	return ORDERING.compare(this, other);
	}

	@Override public boolean equals(Object other) {
	return (other instanceof RbPath) && segments.equals(((RbPath) other).segments);
	}

	@Override public int hashCode() {
	return hashCode;
	}

	@Override public String toString() {
	return String.join("/", segments);
	}
	}