blob: 1736ad4398efe2f054787f17008fdbd0e0e02e37 [file] [log] [blame]
// © 2019 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
package org.unicode.icu.tool.cldrtoicu;
import static com.google.common.base.Preconditions.checkArgument;
import static java.lang.Integer.parseInt;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.unicode.icu.tool.cldrtoicu.regex.NamedFunction;
import com.google.common.base.Ascii;
import com.google.common.base.CharMatcher;
import com.google.common.collect.ImmutableMap;
/**
* The named functions used by the {@code RegexTransformer} for {@code ldml2icu_supplemental.txt}.
*/
final class IcuFunctions {
/**
* Converts an ISO date string to a space-separated pair of integer values representing the top
* and bottom parts of a deconstructed millisecond epoch value (i.e. {@code
* "<hi32bits> <low32bits>"}).
*
* <p>Note that the values are formatted as <em>signed</em> decimal values, so it's entirely
* possible that the low bits value will be appear as a negative number (the high bits won't
* appear negative for many thousands of years).
*
* <ul>
* <li>args[0] = ISO date string (e.g. "2019-05-23")
* <li>args[1] = Date field type name (e.g. "from")
* </ul>
*/
static final NamedFunction DATE_FN =
NamedFunction.create("date", 2, args -> {
long millis =
DateFieldType.toEnum(args.get(1)).toEpochMillis(LocalDate.parse(args.get(0)));
// Strictly speaking the masking is redundant and could be removed.
int hiBits = (int) ((millis >>> 32) & 0xFFFFFFFFL);
int loBits = (int) (millis & 0xFFFFFFFFL);
return hiBits + " " + loBits;
});
// TODO: Improve this documentation (e.g. why is this being done, give examples?).
/**
* Inserts '%' into numberingSystems descriptions.
*
* <ul>
* <li>args[0] = numbering system description (string)
* </ul>
*/
static final NamedFunction ALGORITHM_FN =
NamedFunction.create("algorithm", 1, args -> {
String value = args.get(0);
int percentPos = value.lastIndexOf('/') + 1;
return value.substring(0, percentPos) + '%' + value.substring(percentPos);
});
/**
* Converts a number into a special integer that represents the number in normalized scientific
* notation for ICU's RB parser.
*
* <p>Resultant integers are in the form "xxyyyyyy", where "xx" is the exponent offset by 50
* and "yyyyyy" is the coefficient to 5 decimal places. Results may also have a leading '-' to
* denote negative values.
*
* <p>For example:
* <pre>{@code
* 14660000000000 -> 1.466E13 -> 63146600
* 0.0001 -> 1E-4 -> 46100000
* -123.456 -> -1.23456E-2 -> -48123456
* }</pre>
*
* <p>The additional exponent offset is applied directly to the calculated exponent and is used
* to do things like converting percentages into their decimal representation (i.e. by passing
* a value of "-2").
*
* <ul>
* <li>args[0] = number to be converted (double)
* <li>args[1] = additional exponent offset (integer)
* </ul>
*/
static final NamedFunction EXP_FN =
NamedFunction.create("exp", 2, args -> {
double value = Double.parseDouble(args.get(0));
if (value == 0) {
return "0";
}
int exponent = 50;
if (args.size() == 2) {
exponent += Integer.parseInt(args.get(1));
}
String sign = value >= 0 ? "" : "-";
value = Math.abs(value);
while (value >= 10) {
value /= 10;
exponent++;
}
while (value < 1) {
value *= 10;
exponent--;
}
if (exponent < 0 || exponent > 99) {
throw new IllegalArgumentException("Exponent out of bounds: " + exponent);
}
return sign + exponent + Math.round(value * 100000);
});
// Allow for single digit values in any part and negative year values.
private static final Pattern YMD = Pattern.compile("(-?[0-9]+)-([0-9]{1,2})-([0-9]{1,2})");
/**
* Converts an ISO date string (i.e. "YYYY-MM-DD") into an ICU date string, which is
* the same but with spaces instead of hyphens. Since functions are expanded before the
* resulting value is split, this function will result in 3 separate values being created,
* unless the function call is enclosed in quotes.
*
* <p>Note that for some cases (e.g. "eras") the year part can be negative (e.g. "-2165-1-1")
* so this is not as simple as "split by hyphen".
*
* <ul>
* <li>args[0] = ISO date string (e.g. "2019-05-23" or "-2165-1-1")
* </ul>
*/
static final NamedFunction YMD_FN =
NamedFunction.create("ymd", 1, args -> {
Matcher m = YMD.matcher(args.get(0));
checkArgument(m.matches(), "invalid year-month-day string: %s", args.get(0));
// NOTE: Re-parsing is not optional since it removes leading zeros (needed for ICU).
return String.format("%s %s %s",
parseInt(m.group(1)), parseInt(m.group(2)), parseInt(m.group(3)));
});
// For transforming day-of-week identifiers.
private static final ImmutableMap<String, String> WEEKDAY_MAP_ID =
ImmutableMap.<String, String>builder()
.put("sun", "1")
.put("mon", "2")
.put("tues", "3")
.put("wed", "4")
.put("thu", "5")
.put("fri", "6")
.put("sat", "7")
.build();
/**
* Converts a day-of-week identifier into its ordinal value (e.g. "sun" --> 1, "mon" --> 2 ...).
*/
static final NamedFunction DAY_NUMBER_FN =
NamedFunction.create("day_number", 1,
args -> {
String id = WEEKDAY_MAP_ID.get(args.get(0));
checkArgument(id != null, "unknown weekday: %s", args.get(0));
return id;
});
// For transform IDs in <contextTransform> elements.
private static final ImmutableMap<String, String> TRANSFORM_ID_MAP =
ImmutableMap.of("no-change", "0", "titlecase-firstword", "1");
/**
* Converts the transform type in the {@code <contextTransform>} element into its ICU index
* (e.g. "titlecase-firstword" --> 1).
*/
static final NamedFunction CONTEXT_TRANSFORM_INDEX_FN =
NamedFunction.create("context_transform_index", 1,
args -> {
String id = TRANSFORM_ID_MAP.get(args.get(0));
checkArgument(id != null, "unknown contextTransform: %s", args.get(0));
return id;
});
// For DATE_FN only.
private enum DateFieldType {
from(LocalDate::atStartOfDay),
// Remember that atTime() takes nanoseconds, not micro or milli.
to(d -> d.atTime(23, 59, 59, 999_000_000));
private final Function<LocalDate, LocalDateTime> adjustFn;
DateFieldType(Function<LocalDate, LocalDateTime> adjustFn) {
this.adjustFn = adjustFn;
}
long toEpochMillis(LocalDate date) {
return adjustFn.apply(date).toInstant(ZoneOffset.UTC).toEpochMilli();
}
static DateFieldType toEnum(String value) {
switch (Ascii.toLowerCase(CharMatcher.whitespace().trimFrom(value))) {
case "from":
case "start":
return from;
case "to":
case "end":
return to;
default:
throw new IllegalArgumentException(value + " is not a valid date field type");
}
}
}
private IcuFunctions() {}
}