ICU-21176 Add aliases for terms "whitelist" and "blacklist" in data filter
See #1189
diff --git a/.ci-builds/data-filter.json b/.ci-builds/data-filter.json
index ffde995..0cfc8ff 100644
--- a/.ci-builds/data-filter.json
+++ b/.ci-builds/data-filter.json
@@ -8,18 +8,29 @@
]
},
// Test mixed feature filter and resource filter
-// Exlude translit data so we can run test for ICU-20673
+// Exclude translit data so we can run test for ICU-20673
+// Also test for "whitelist" versus "includelist"
"featureFilters": {
"misc": {
"whitelist": ["supplementalData"]
},
- "translit": "exclude"
+ "translit": "exclude",
+ "curr_tree": {
+ "filterType": "locale",
+ "includelist": ["my"]
+ },
+ "brkitr_rules": {
+ "excludelist": ["line"]
+ },
+ "brkitr_dictionaries": {
+ "blacklist": ["cjdict"]
+ }
},
"resourceFilters": [
{
"categories": ["misc"],
"files": {
- "whitelist": ["supplementalData"]
+ "includelist": ["supplementalData"]
},
"rules": ["+/*"]
}
diff --git a/docs/userguide/icu_data/buildtool.md b/docs/userguide/icu_data/buildtool.md
index 7c3eadc..ff03e4f 100644
--- a/docs/userguide/icu_data/buildtool.md
+++ b/docs/userguide/icu_data/buildtool.md
@@ -76,7 +76,7 @@
{
"localeFilter": {
"filterType": "language",
- "whitelist": [
+ "includelist": [
"en",
"de",
"zh"
@@ -86,6 +86,11 @@
The *filterType* "language" only supports slicing by entire languages.
+##### Terminology: Includelist, Excludelist, Whitelist, Blacklist
+
+Prior to ICU 68, use `"whitelist"` and `"blacklist"` instead of `"includelist"`
+and `"excludelist"`, respectively. ICU 68 allows all four terms.
+
#### Filtering by Locale
For more control, use *filterType* "locale". Here is a *filters.hjson* file that
@@ -94,13 +99,15 @@
localeFilter: {
filterType: locale
- whitelist: [
+ includelist: [
en
de
zh
]
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
#### Adding Script Variants (includeScripts = true)
You may set the *includeScripts* option to true to include all scripts for a
@@ -112,7 +119,7 @@
"localeFilter": {
"filterType": "locale",
"includeScripts": true,
- "whitelist": [
+ "includelist": [
"en",
"de",
"zh"
@@ -120,6 +127,8 @@
}
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
If you wish to explicitly list the scripts, you may put the script code in the
locale tag in the whitelist, and you do not need the *includeScripts* option
enabled. For example, in Hjson, to include Han Traditional ***but not Han
@@ -127,14 +136,16 @@
localeFilter: {
filterType: locale
- whitelist: [
+ includelist: [
en
de
zh_Hant
]
}
-Note: the option *includeScripts* is only supported at the language level;
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
+**Note:** the option *includeScripts* is only supported at the language level;
i.e., in order to include all scripts for a particular language, you must
specify the language alone, without a region tag.
@@ -150,7 +161,7 @@
localeFilter: {
filterType: locale
includeChildren: false
- whitelist: [
+ includelist: [
en_US
en_GB
de_DE
@@ -158,6 +169,8 @@
]
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
Including dependencies, the above filter would include the following data files:
- root.txt
@@ -285,7 +298,7 @@
featureFilters: {
brkitr_dictionaries: {
- whitelist: [
+ includelist: [
burmesedict
]
}
@@ -295,7 +308,8 @@
automatically for you. Note that all files in a particular category have the
same directory and extension.
-You can use either a whitelist or a blacklist for the file name filter.
+You can use either `"includelist"` or `"excludelist"` for the file name filter.
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
##### Regex Filter
@@ -305,7 +319,7 @@
featureFilters: {
brkitr_rules: {
filterType: regex
- blacklist: [
+ excludelist: [
^.*_cj$
]
}
@@ -353,12 +367,14 @@
featureFilters:
curr_tree: {
filterType: locale
- whitelist: [
+ includelist: [
it
]
}
}
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
You can exclude an entire `_tree` category without affecting other categories.
For example, to exclude region display names:
@@ -446,7 +462,7 @@
{
categories: ["misc"]
files: {
- whitelist: ["supplementalData"]
+ includelist: ["supplementalData"]
}
rules: [
-/calendarData
@@ -454,6 +470,8 @@
}
]
+*If using ICU 67 or earlier, see note above regarding allowed keywords.*
+
#### Combining Multiple Resource Filter Specs
You can also list multiple resource filter objects in the *resourceFilters*
@@ -474,7 +492,7 @@
categories: ["unit_tree"]
files: {
filterType: locale
- whitelist: ["en_US"]
+ includelist: ["en_US"]
}
rules: [
+/*/length/mile
@@ -484,7 +502,7 @@
categories: ["unit_tree"]
files: {
filterType: locale
- whitelist: ["en_CA"]
+ includelist: ["en_CA"]
}
rules: [
+/*/length/kilometer
diff --git a/icu4c/source/python/icutools/databuilder/filtration.py b/icu4c/source/python/icutools/databuilder/filtration.py
index 554013a..5ad5f50 100644
--- a/icu4c/source/python/icutools/databuilder/filtration.py
+++ b/icu4c/source/python/icutools/databuilder/filtration.py
@@ -78,15 +78,22 @@
return False
-class WhitelistBlacklistFilter(Filter):
+class IncludeExcludeFilter(Filter):
def __init__(self, json_data):
if "whitelist" in json_data:
- self.is_whitelist = True
- self.whitelist = json_data["whitelist"]
+ self.is_includelist = True
+ self.includelist = json_data["whitelist"]
+ elif "includelist" in json_data:
+ self.is_includelist = True
+ self.includelist = json_data["includelist"]
+ elif "blacklist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["blacklist"]
+ elif "excludelist" in json_data:
+ self.is_includelist = False
+ self.excludelist = json_data["excludelist"]
else:
- assert "blacklist" in json_data, "Need either whitelist or blacklist: %s" % str(json_data)
- self.is_whitelist = False
- self.blacklist = json_data["blacklist"]
+ raise AssertionError("Need either includelist or excludelist: %s" % str(json_data))
def match(self, file):
file_stem = self._file_to_file_stem(file)
@@ -97,43 +104,43 @@
pass
-class FileStemFilter(WhitelistBlacklistFilter):
+class FileStemFilter(IncludeExcludeFilter):
def _should_include(self, file_stem):
- if self.is_whitelist:
- return file_stem in self.whitelist
+ if self.is_includelist:
+ return file_stem in self.includelist
else:
- return file_stem not in self.blacklist
+ return file_stem not in self.excludelist
-class LanguageFilter(WhitelistBlacklistFilter):
+class LanguageFilter(IncludeExcludeFilter):
def _should_include(self, file_stem):
language = file_stem.split("_")[0]
if language == "root":
# Always include root.txt
return True
- if self.is_whitelist:
- return language in self.whitelist
+ if self.is_includelist:
+ return language in self.includelist
else:
- return language not in self.blacklist
+ return language not in self.excludelist
-class RegexFilter(WhitelistBlacklistFilter):
+class RegexFilter(IncludeExcludeFilter):
def __init__(self, *args):
# TODO(ICU-20301): Change this to: super().__init__(*args)
super(RegexFilter, self).__init__(*args)
- if self.is_whitelist:
- self.whitelist = [re.compile(pat) for pat in self.whitelist]
+ if self.is_includelist:
+ self.includelist = [re.compile(pat) for pat in self.includelist]
else:
- self.blacklist = [re.compile(pat) for pat in self.blacklist]
+ self.excludelist = [re.compile(pat) for pat in self.excludelist]
def _should_include(self, file_stem):
- if self.is_whitelist:
- for pattern in self.whitelist:
+ if self.is_includelist:
+ for pattern in self.includelist:
if pattern.match(file_stem):
return True
return False
else:
- for pattern in self.blacklist:
+ for pattern in self.excludelist:
if pattern.match(file_stem):
return False
return True
@@ -159,7 +166,12 @@
class LocaleFilter(Filter):
def __init__(self, json_data, io):
- self.locales_requested = list(json_data["whitelist"])
+ if "whitelist" in json_data:
+ self.locales_requested = list(json_data["whitelist"])
+ elif "includelist" in json_data:
+ self.locales_requested = list(json_data["includelist"])
+ else:
+ raise AssertionError("You must have an includelist in a locale filter")
self.include_children = json_data.get("includeChildren", True)
self.include_scripts = json_data.get("includeScripts", False)
diff --git a/icu4c/source/python/icutools/databuilder/filtration_schema.json b/icu4c/source/python/icutools/databuilder/filtration_schema.json
index 2b7ff99..3aed41a 100644
--- a/icu4c/source/python/icutools/databuilder/filtration_schema.json
+++ b/icu4c/source/python/icutools/databuilder/filtration_schema.json
@@ -90,7 +90,7 @@
{
"properties": {
"filterType": {
- "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+ "$ref": "#/definitions/includeExcludeFilterTypes"
},
"whitelist": { "$ref": "#/definitions/stringList" }
},
@@ -100,7 +100,7 @@
{
"properties": {
"filterType": {
- "$ref": "#/definitions/blacklistWhitelistFilterTypes"
+ "$ref": "#/definitions/includeExcludeFilterTypes"
},
"blacklist": { "$ref": "#/definitions/stringList" }
},
@@ -110,6 +110,26 @@
{
"properties": {
"filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["includelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "$ref": "#/definitions/includeExcludeFilterTypes"
+ },
+ "excludelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["excludelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
"type": "string",
"enum": ["exclude"]
}
@@ -138,6 +158,23 @@
"properties": {
"filterType": {
"type": "string",
+ "enum": ["locale"]
+ },
+ "includeChildren": {
+ "type": "boolean"
+ },
+ "includeScripts": {
+ "type": "boolean"
+ },
+ "includelist": { "$ref": "#/definitions/stringList" }
+ },
+ "required": ["filterType", "includelist"],
+ "additionalProperties": false
+ },
+ {
+ "properties": {
+ "filterType": {
+ "type": "string",
"enum": ["union"]
},
"unionOf": {
@@ -150,7 +187,7 @@
}
]
},
- "blacklistWhitelistFilterTypes": {
+ "includeExcludeFilterTypes": {
"type": "string",
"enum": [
"language",