blob: e7a4c6380e6a226e3383851577a9f2e3afaa500d [file] [log] [blame]
# Copyright (C) 2018 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# Python 2/3 Compatibility (ICU-20299)
# TODO(ICU-20301): Remove this.
from __future__ import print_function
from abc import abstractmethod
from collections import defaultdict
import re
import sys
from . import *
from . import utils
from .request_types import *
# Note: for this to be a proper abstract class, it should extend abc.ABC.
# There is no nice way to do this that works in both Python 2 and 3.
# TODO(ICU-20301): Make this inherit from abc.ABC.
class Filter(object):
@staticmethod
def create_from_json(json_data):
if "filterType" in json_data:
filter_type = json_data["filterType"]
else:
filter_type = "file-stem"
if filter_type == "file-stem":
return FileStemFilter(json_data)
elif filter_type == "language":
return LanguageFilter(json_data)
elif filter_type == "regex":
return RegexFilter(json_data)
elif filter_type == "exclude":
return ExclusionFilter()
else:
print("Error: Unknown filterType option: %s" % filter_type, file=sys.stderr)
return None
def filter(self, request):
if not request.apply_file_filter(self):
return []
for file in request.all_input_files():
assert self.match(file)
return [request]
@abstractmethod
def match(self, file):
pass
class ExclusionFilter(Filter):
def match(self, file):
return False
class WhitelistBlacklistFilter(Filter):
def __init__(self, json_data):
if "whitelist" in json_data:
self.is_whitelist = True
self.whitelist = json_data["whitelist"]
elif "blacklist" in json_data:
self.is_whitelist = False
self.blacklist = json_data["blacklist"]
def match(self, file):
file_stem = self._file_to_file_stem(file)
return self._should_include(file_stem)
@classmethod
def _file_to_file_stem(cls, file):
start = file.filename.rfind("/")
limit = file.filename.rfind(".")
return file.filename[start+1:limit]
@abstractmethod
def _should_include(self, file_stem):
pass
class FileStemFilter(WhitelistBlacklistFilter):
def _should_include(self, file_stem):
if self.is_whitelist:
return file_stem in self.whitelist
else:
return file_stem not in self.blacklist
class LanguageFilter(WhitelistBlacklistFilter):
def _should_include(self, file_stem):
language = file_stem.split("_")[0]
if language == "root":
# Always include root.txt
return True
if self.is_whitelist:
return language in self.whitelist
else:
return language not in self.blacklist
class RegexFilter(WhitelistBlacklistFilter):
def __init__(self, *args):
# TODO(ICU-20301): Change this to: super().__init__(*args)
super(RegexFilter, self).__init__(*args)
if self.is_whitelist:
self.whitelist = [re.compile(pat) for pat in self.whitelist]
else:
self.blacklist = [re.compile(pat) for pat in self.blacklist]
def _should_include(self, file_stem):
if self.is_whitelist:
for pattern in self.whitelist:
if pattern.match(file_stem):
return True
return False
else:
for pattern in self.blacklist:
if pattern.match(file_stem):
return False
return True
def apply_filters(requests, config):
"""Runs the filters and returns a new list of requests."""
requests = _apply_file_filters(requests, config)
requests = _apply_resource_filters(requests, config)
return requests
def _apply_file_filters(old_requests, config):
"""Filters out entire files."""
filters = _preprocess_file_filters(old_requests, config)
new_requests = []
for request in old_requests:
category = request.category
if category in filters:
new_requests += filters[category].filter(request)
else:
new_requests.append(request)
return new_requests
def _preprocess_file_filters(requests, config):
all_categories = set(
request.category
for request in requests
)
all_categories.remove(None)
all_categories = list(sorted(all_categories))
json_data = config.filters_json_data
filters = {}
for category in all_categories:
if "featureFilters" in json_data and category in json_data["featureFilters"]:
filters[category] = Filter.create_from_json(
json_data["featureFilters"][category]
)
elif "localeFilter" in json_data and category[-5:] == "_tree":
filters[category] = Filter.create_from_json(
json_data["localeFilter"]
)
if "featureFilters" in json_data:
for category in json_data["featureFilters"]:
if category not in all_categories:
print("Warning: category %s is not known" % category, file=sys.stderr)
return filters
def _apply_resource_filters(old_requests, config):
"""Creates filters for looking within resource bundle files."""
return old_requests