|  | # Copyright (C) 2018 and later: Unicode, Inc. and others. | 
|  | # License & terms of use: http://www.unicode.org/copyright.html | 
|  |  | 
|  | # Python 2/3 Compatibility (ICU-20299) | 
|  | # TODO(ICU-20301): Remove this. | 
|  | from __future__ import print_function | 
|  |  | 
|  | from abc import abstractmethod | 
|  | from collections import defaultdict | 
|  | import re | 
|  | import sys | 
|  |  | 
|  | from . import * | 
|  | from . import utils | 
|  | from .request_types import * | 
|  |  | 
|  |  | 
|  | # Note: for this to be a proper abstract class, it should extend abc.ABC. | 
|  | # There is no nice way to do this that works in both Python 2 and 3. | 
|  | # TODO(ICU-20301): Make this inherit from abc.ABC. | 
|  | class Filter(object): | 
|  | @staticmethod | 
|  | def create_from_json(json_data, io): | 
|  | assert io != None | 
|  | if "filterType" in json_data: | 
|  | filter_type = json_data["filterType"] | 
|  | else: | 
|  | filter_type = "file-stem" | 
|  |  | 
|  | if filter_type == "file-stem": | 
|  | return FileStemFilter(json_data) | 
|  | elif filter_type == "language": | 
|  | return LanguageFilter(json_data) | 
|  | elif filter_type == "regex": | 
|  | return RegexFilter(json_data) | 
|  | elif filter_type == "exclude": | 
|  | return ExclusionFilter() | 
|  | elif filter_type == "union": | 
|  | return UnionFilter(json_data, io) | 
|  | elif filter_type == "locale": | 
|  | return LocaleFilter(json_data, io) | 
|  | else: | 
|  | print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr) | 
|  | return None | 
|  |  | 
|  | def filter(self, request): | 
|  | if not request.apply_file_filter(self): | 
|  | return [] | 
|  | for file in request.all_input_files(): | 
|  | assert self.match(file) | 
|  | return [request] | 
|  |  | 
|  | @staticmethod | 
|  | def _file_to_file_stem(file): | 
|  | start = file.filename.rfind("/") | 
|  | limit = file.filename.rfind(".") | 
|  | return file.filename[start+1:limit] | 
|  |  | 
|  | @staticmethod | 
|  | def _file_to_subdir(file): | 
|  | limit = file.filename.rfind("/") | 
|  | if limit == -1: | 
|  | return None | 
|  | return file.filename[:limit] | 
|  |  | 
|  | @abstractmethod | 
|  | def match(self, file): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class InclusionFilter(Filter): | 
|  | def match(self, file): | 
|  | return True | 
|  |  | 
|  |  | 
|  | class ExclusionFilter(Filter): | 
|  | def match(self, file): | 
|  | return False | 
|  |  | 
|  |  | 
|  | class IncludeExcludeFilter(Filter): | 
|  | def __init__(self, json_data): | 
|  | if "whitelist" in json_data: | 
|  | self.is_includelist = True | 
|  | self.includelist = json_data["whitelist"] | 
|  | elif "includelist" in json_data: | 
|  | self.is_includelist = True | 
|  | self.includelist = json_data["includelist"] | 
|  | elif "blacklist" in json_data: | 
|  | self.is_includelist = False | 
|  | self.excludelist = json_data["blacklist"] | 
|  | elif "excludelist" in json_data: | 
|  | self.is_includelist = False | 
|  | self.excludelist = json_data["excludelist"] | 
|  | else: | 
|  | raise AssertionError("Need either includelist or excludelist: %s" % str(json_data)) | 
|  |  | 
|  | def match(self, file): | 
|  | file_stem = self._file_to_file_stem(file) | 
|  | return self._should_include(file_stem) | 
|  |  | 
|  | @abstractmethod | 
|  | def _should_include(self, file_stem): | 
|  | pass | 
|  |  | 
|  |  | 
|  | class FileStemFilter(IncludeExcludeFilter): | 
|  | def _should_include(self, file_stem): | 
|  | if self.is_includelist: | 
|  | return file_stem in self.includelist | 
|  | else: | 
|  | return file_stem not in self.excludelist | 
|  |  | 
|  |  | 
|  | class LanguageFilter(IncludeExcludeFilter): | 
|  | def _should_include(self, file_stem): | 
|  | language = file_stem.split("_")[0] | 
|  | if language == "root": | 
|  | # Always include root.txt | 
|  | return True | 
|  | if self.is_includelist: | 
|  | return language in self.includelist | 
|  | else: | 
|  | return language not in self.excludelist | 
|  |  | 
|  |  | 
|  | class RegexFilter(IncludeExcludeFilter): | 
|  | def __init__(self, *args): | 
|  | # TODO(ICU-20301): Change this to: super().__init__(*args) | 
|  | super(RegexFilter, self).__init__(*args) | 
|  | if self.is_includelist: | 
|  | self.includelist = [re.compile(pat) for pat in self.includelist] | 
|  | else: | 
|  | self.excludelist = [re.compile(pat) for pat in self.excludelist] | 
|  |  | 
|  | def _should_include(self, file_stem): | 
|  | if self.is_includelist: | 
|  | for pattern in self.includelist: | 
|  | if pattern.match(file_stem): | 
|  | return True | 
|  | return False | 
|  | else: | 
|  | for pattern in self.excludelist: | 
|  | if pattern.match(file_stem): | 
|  | return False | 
|  | return True | 
|  |  | 
|  |  | 
|  | class UnionFilter(Filter): | 
|  | def __init__(self, json_data, io): | 
|  | # Collect the sub-filters. | 
|  | self.sub_filters = [] | 
|  | for filter_json in json_data["unionOf"]: | 
|  | self.sub_filters.append(Filter.create_from_json(filter_json, io)) | 
|  |  | 
|  | def match(self, file): | 
|  | """Match iff any of the sub-filters match.""" | 
|  | for filter in self.sub_filters: | 
|  | if filter.match(file): | 
|  | return True | 
|  | return False | 
|  |  | 
|  |  | 
|  | LANGUAGE_SCRIPT_REGEX = re.compile(r"^([a-z]{2,3})_[A-Z][a-z]{3}$") | 
|  | LANGUAGE_ONLY_REGEX = re.compile(r"^[a-z]{2,3}$") | 
|  |  | 
|  | class LocaleFilter(Filter): | 
|  | def __init__(self, json_data, io): | 
|  | if "whitelist" in json_data: | 
|  | self.locales_requested = list(json_data["whitelist"]) | 
|  | elif "includelist" in json_data: | 
|  | self.locales_requested = list(json_data["includelist"]) | 
|  | else: | 
|  | raise AssertionError("You must have an includelist in a locale filter") | 
|  | self.include_children = json_data.get("includeChildren", True) | 
|  | self.include_scripts = json_data.get("includeScripts", False) | 
|  |  | 
|  | # Load the dependency graph from disk | 
|  | self.dependency_data_by_tree = { | 
|  | tree: io.read_locale_deps(tree) | 
|  | for tree in utils.ALL_TREES | 
|  | } | 
|  |  | 
|  | def match(self, file): | 
|  | tree = self._file_to_subdir(file) | 
|  | assert tree is not None | 
|  | locale = self._file_to_file_stem(file) | 
|  |  | 
|  | # A locale is *required* if it is *requested* or an ancestor of a | 
|  | # *requested* locale. | 
|  | if locale in self._locales_required(tree): | 
|  | return True | 
|  |  | 
|  | # Resolve include_scripts and include_children. | 
|  | return self._match_recursive(locale, tree) | 
|  |  | 
|  | def _match_recursive(self, locale, tree): | 
|  | # Base case: return True if we reached a *requested* locale, | 
|  | # or False if we ascend out of the locale tree. | 
|  | if locale is None: | 
|  | return False | 
|  | if locale in self.locales_requested: | 
|  | return True | 
|  |  | 
|  | # Check for alternative scripts. | 
|  | # This causes sr_Latn to check sr instead of going directly to root. | 
|  | if self.include_scripts: | 
|  | match = LANGUAGE_SCRIPT_REGEX.match(locale) | 
|  | if match and self._match_recursive(match.group(1), tree): | 
|  | return True | 
|  |  | 
|  | # Check if we are a descendant of a *requested* locale. | 
|  | if self.include_children: | 
|  | parent = self._get_parent_locale(locale, tree) | 
|  | if self._match_recursive(parent, tree): | 
|  | return True | 
|  |  | 
|  | # No matches. | 
|  | return False | 
|  |  | 
|  | def _get_parent_locale(self, locale, tree): | 
|  | """Gets the parent locale in the given tree, according to dependency data.""" | 
|  | dependency_data = self.dependency_data_by_tree[tree] | 
|  | if "parents" in dependency_data and locale in dependency_data["parents"]: | 
|  | return dependency_data["parents"][locale] | 
|  | if "aliases" in dependency_data and locale in dependency_data["aliases"]: | 
|  | return dependency_data["aliases"][locale] | 
|  | if LANGUAGE_ONLY_REGEX.match(locale): | 
|  | return "root" | 
|  | i = locale.rfind("_") | 
|  | if i < 0: | 
|  | assert locale == "root", "Invalid locale: %s/%s" % (tree, locale) | 
|  | return None | 
|  | return locale[:i] | 
|  |  | 
|  | def _locales_required(self, tree): | 
|  | """Returns a generator of all required locales in the given tree.""" | 
|  | for locale in self.locales_requested: | 
|  | while locale is not None: | 
|  | yield locale | 
|  | locale = self._get_parent_locale(locale, tree) | 
|  |  | 
|  |  | 
|  | def apply_filters(requests, config, io): | 
|  | """Runs the filters and returns a new list of requests.""" | 
|  | requests = _apply_file_filters(requests, config, io) | 
|  | requests = _apply_resource_filters(requests, config, io) | 
|  | return requests | 
|  |  | 
|  |  | 
|  | def _apply_file_filters(old_requests, config, io): | 
|  | """Filters out entire files.""" | 
|  | filters = _preprocess_file_filters(old_requests, config, io) | 
|  | new_requests = [] | 
|  | for request in old_requests: | 
|  | category = request.category | 
|  | if category in filters: | 
|  | new_requests += filters[category].filter(request) | 
|  | else: | 
|  | new_requests.append(request) | 
|  | return new_requests | 
|  |  | 
|  |  | 
|  | def _preprocess_file_filters(requests, config, io): | 
|  | all_categories = set( | 
|  | request.category | 
|  | for request in requests | 
|  | ) | 
|  | all_categories.remove(None) | 
|  | all_categories = list(sorted(all_categories)) | 
|  | json_data = config.filters_json_data | 
|  | filters = {} | 
|  | default_filter_json = "exclude" if config.strategy == "additive" else "include" | 
|  | for category in all_categories: | 
|  | filter_json = default_filter_json | 
|  | # Figure out the correct filter to create | 
|  | if "featureFilters" in json_data and category in json_data["featureFilters"]: | 
|  | filter_json = json_data["featureFilters"][category] | 
|  | if filter_json == "include" and "localeFilter" in json_data and category.endswith("_tree"): | 
|  | filter_json = json_data["localeFilter"] | 
|  | # Resolve the filter JSON into a filter object | 
|  | if filter_json == "exclude": | 
|  | filters[category] = ExclusionFilter() | 
|  | elif filter_json == "include": | 
|  | pass  # no-op | 
|  | else: | 
|  | filters[category] = Filter.create_from_json(filter_json, io) | 
|  | if "featureFilters" in json_data: | 
|  | for category in json_data["featureFilters"]: | 
|  | if category not in all_categories: | 
|  | print("Warning: category %s is not known" % category, file=sys.stderr) | 
|  | return filters | 
|  |  | 
|  |  | 
|  | class ResourceFilterInfo(object): | 
|  | def __init__(self, category, strategy): | 
|  | self.category = category | 
|  | self.strategy = strategy | 
|  | self.filter_tmp_dir = "filters/%s" % category | 
|  | self.input_files = None | 
|  | self.filter_files = None | 
|  | self.rules_by_file = None | 
|  |  | 
|  | def apply_to_requests(self, all_requests): | 
|  | # Call this method only once per list of requests. | 
|  | assert self.input_files is None | 
|  | for request in all_requests: | 
|  | if request.category != self.category: | 
|  | continue | 
|  | if not isinstance(request, AbstractExecutionRequest): | 
|  | continue | 
|  | if request.tool != IcuTool("genrb"): | 
|  | continue | 
|  | if not request.input_files: | 
|  | continue | 
|  | self._set_files(request.input_files) | 
|  | request.dep_targets += [self.filter_files[:]] | 
|  | arg_str = "--filterDir {TMP_DIR}/%s" % self.filter_tmp_dir | 
|  | request.args = "%s %s" % (arg_str, request.args) | 
|  |  | 
|  | # Make sure we found the target request | 
|  | if self.input_files is None: | 
|  | print("WARNING: Category not found: %s" % self.category, file=sys.stderr) | 
|  | self.input_files = [] | 
|  | self.filter_files = [] | 
|  | self.rules_by_file = [] | 
|  |  | 
|  | def _set_files(self, files): | 
|  | # Note: The input files to genrb for a certain category should always | 
|  | # be the same. For example, there are often two genrb calls: one for | 
|  | # --writePoolBundle, and the other for --usePoolBundle. They are both | 
|  | # expected to have the same list of input files. | 
|  | if self.input_files is not None: | 
|  | assert self.input_files == files | 
|  | return | 
|  | self.input_files = list(files) | 
|  | self.filter_files = [ | 
|  | TmpFile("%s/%s" % (self.filter_tmp_dir, basename)) | 
|  | for basename in ( | 
|  | file.filename[file.filename.rfind("/")+1:] | 
|  | for file in files | 
|  | ) | 
|  | ] | 
|  | if self.strategy == "additive": | 
|  | self.rules_by_file = [ | 
|  | [r"-/", r"+/%%ALIAS", r"+/%%Parent"] | 
|  | for _ in range(len(files)) | 
|  | ] | 
|  | else: | 
|  | self.rules_by_file = [ | 
|  | [r"+/"] | 
|  | for _ in range(len(files)) | 
|  | ] | 
|  |  | 
|  | def add_rules(self, file_filter, rules): | 
|  | for file, rule_list in zip(self.input_files, self.rules_by_file): | 
|  | if file_filter.match(file): | 
|  | rule_list += rules | 
|  |  | 
|  | def make_requests(self): | 
|  | # Map from rule list to filter files with that rule list | 
|  | unique_rules = defaultdict(list) | 
|  | for filter_file, rules in zip(self.filter_files, self.rules_by_file): | 
|  | unique_rules[tuple(rules)].append(filter_file) | 
|  |  | 
|  | new_requests = [] | 
|  | i = 0 | 
|  | for rules, filter_files in unique_rules.items(): | 
|  | base_filter_file = filter_files[0] | 
|  | new_requests += [ | 
|  | PrintFileRequest( | 
|  | name = "%s_print_%d" % (self.category, i), | 
|  | output_file = base_filter_file, | 
|  | content = self._generate_resource_filter_txt(rules) | 
|  | ) | 
|  | ] | 
|  | i += 1 | 
|  | for filter_file in filter_files[1:]: | 
|  | new_requests += [ | 
|  | CopyRequest( | 
|  | name = "%s_copy_%d" % (self.category, i), | 
|  | input_file = base_filter_file, | 
|  | output_file = filter_file | 
|  | ) | 
|  | ] | 
|  | i += 1 | 
|  | return new_requests | 
|  |  | 
|  | @staticmethod | 
|  | def _generate_resource_filter_txt(rules): | 
|  | result = "# Caution: This file is automatically generated\n\n" | 
|  | result += "\n".join(rules) | 
|  | return result | 
|  |  | 
|  |  | 
|  | def _apply_resource_filters(all_requests, config, io): | 
|  | """Creates filters for looking within resource bundle files.""" | 
|  | json_data = config.filters_json_data | 
|  | if "resourceFilters" not in json_data: | 
|  | return all_requests | 
|  |  | 
|  | collected = {} | 
|  | for entry in json_data["resourceFilters"]: | 
|  | if "files" in entry: | 
|  | file_filter = Filter.create_from_json(entry["files"], io) | 
|  | else: | 
|  | file_filter = InclusionFilter() | 
|  | for category in entry["categories"]: | 
|  | # not defaultdict because we need to pass arguments to the constructor | 
|  | if category not in collected: | 
|  | filter_info = ResourceFilterInfo(category, config.strategy) | 
|  | filter_info.apply_to_requests(all_requests) | 
|  | collected[category] = filter_info | 
|  | else: | 
|  | filter_info = collected[category] | 
|  | filter_info.add_rules(file_filter, entry["rules"]) | 
|  |  | 
|  | # Add the filter generation requests to the beginning so that by default | 
|  | # they are made before genrb gets run (order is required by windirect) | 
|  | new_requests = [] | 
|  | for filter_info in collected.values(): | 
|  | new_requests += filter_info.make_requests() | 
|  | new_requests += all_requests | 
|  | return new_requests |