| #! /usr/bin/python |
| |
| # Copyright (C) 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| |
| # Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others. |
| # All rights reserved. |
| |
| # |
| # Script to check and fix svn property settings for ICU source files. |
| # Also check for the correct line endings on files with svn:eol-style = native |
| # |
| # THIS SCRIPT DOES NOT WORK ON WINDOWS |
| # It only works correctly on platforms where the native line ending is a plain \n |
| # |
| # usage: |
| # icu-svnprops-check.py [options] |
| # |
| # options: |
| # -f | --fix Fix any problems that are found |
| # -h | --help Print a usage line and exit. |
| # |
| # The tool operates recursively on the directory from which it is run. |
| # Only files from the svn repository are checked. |
| # No changes are made to the repository; only the working copy will be altered. |
| |
| import sys |
| import os |
| import os.path |
| import re |
| import getopt |
| |
| |
| # file_types: The parsed form of the svn auto-props specification. |
| # A list of file types - .cc, .cpp, .txt, etc. |
| # each element is a [type, proplist] |
| # "type" is a regular expression string that will match a file name |
| # prop list is another list, one element per property. |
| # Each property item is a two element list, [prop name, prop value] |
| file_types = list() |
| |
| def parse_auto_props(): |
| aprops = svn_auto_props.splitlines() |
| for propline in aprops: |
| if re.match("\s*(#.*)?$", propline): # Match comment and blank lines |
| continue |
| if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] line. |
| continue |
| if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for <file-type> = |
| print "Bad line from autoprops definitions: " + propline |
| continue |
| file_type, string_proplist = propline.split("=", 1) |
| |
| #transform the file type expression from autoprops into a normal regular expression. |
| # e.g. "*.cpp" ==> ".*\.cpp$" |
| file_type = file_type.strip() |
| file_type = file_type.replace(".", "\.") |
| file_type = file_type.replace("*", ".*") |
| file_type = file_type + "$" |
| |
| # example string_proplist at this point: " svn:eol-style=native;svn:executable" |
| # split on ';' into a list of properties. The negative lookahead and lookbehind |
| # in the split regexp are to prevent matching on ';;', which is an escaped ';' |
| # within a property value. |
| string_proplist = re.split("(?<!;);(?!;)", string_proplist) |
| proplist = list() |
| for prop in string_proplist: |
| if prop.find("=") >= 0: |
| prop_name, prop_val = prop.split("=", 1) |
| else: |
| # properties with no explicit value, e.g. svn:executable |
| prop_name, prop_val = prop, "" |
| prop_name = prop_name.strip() |
| prop_val = prop_val.strip() |
| # unescape any ";;" in a property value, e.g. the mime-type from |
| # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 |
| prop_val = prop_val.replace(";;", ";"); |
| # If the prop value "is quoted", remove the quotes. |
| # See svn:keywords for an example of a quoted prop value. |
| match = re.match('^"(.+)"$', prop_val) |
| if match: |
| prop_val = match.group(1) |
| proplist.append((prop_name, prop_val)) |
| |
| file_types.append((file_type, proplist)) |
| # print file_types |
| |
| |
| def runCommand(cmd): |
| output_file = os.popen(cmd); |
| output_text = output_file.read(); |
| exit_status = output_file.close(); |
| if exit_status: |
| print >>sys.stderr, '"', cmd, '" failed. Exiting.' |
| sys.exit(exit_status) |
| return output_text |
| |
| svn_auto_props = runCommand("svn propget svn:auto-props http://source.icu-project.org/repos/icu") |
| |
| def usage(): |
| print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" |
| |
| |
| # |
| # UTF-8 file check. For text files with svn:mime-type=text/anything, check the specified charset |
| # file_name: name of a text file. |
| # base_mime_type: svn:mime-type property from the auto-props settings for this file type. |
| # actual_mime_type: existing svn:mime-type property value for the file. |
| # return: The correct svn:mime-type property value, |
| # either the original, if it looks OK, otherwise the value from auto-props |
| # |
| def check_utf8(file_name, base_mime_type, actual_mime_type): |
| |
| f = open(file_name, 'r') |
| bytes = f.read() |
| f.close() |
| file_is_utf8 = True |
| try: |
| bytes.decode("UTF-8") |
| except UnicodeDecodeError: |
| file_is_utf8 = False |
| |
| if not file_is_utf8 and actual_mime_type.find("utf-8") >= 0: |
| print "Error: %s is not valid utf-8, but has a utf-8 mime type." % file_name |
| return actual_mime_type |
| |
| if file_is_utf8 and actual_mime_type.find("charset") >=0 and actual_mime_type.find("utf-8") < 0: |
| print "Warning: %s is valid utf-8, but has a mime-type of %s." % (file_name, actual_mime_type) |
| |
| if ord(bytes[0]) == 0xef: |
| if not file_name.endswith(".txt"): |
| print "Warning: file %s contains a UTF-8 BOM: " % file_name |
| |
| # If the file already has a charset in its mime-type, don't make any change. |
| |
| if actual_mime_type.find("charset=") >= 0: |
| return actual_mime_type; |
| |
| return base_mime_type |
| |
| |
| def main(argv): |
| fix_problems = False; |
| try: |
| opts, args = getopt.getopt(argv, "fh", ("fix", "help")) |
| except getopt.GetoptError: |
| print "unrecognized option: " + argv[0] |
| usage() |
| sys.exit(2) |
| for opt, arg in opts: |
| if opt in ("-h", "--help"): |
| usage() |
| sys.exit() |
| if opt in ("-f", "--fix"): |
| fix_problems = True |
| if args: |
| print "unexpected command line argument" |
| usage() |
| sys.exit() |
| |
| parse_auto_props() |
| output = runCommand("svn ls -R "); |
| file_list = output.splitlines() |
| |
| for f in file_list: |
| if os.path.isdir(f): |
| # print "Skipping dir " + f |
| continue |
| if not os.path.isfile(f): |
| print "Repository file not in working copy: " + f |
| continue; |
| |
| for file_pattern, props in file_types: |
| if re.match(file_pattern, f): |
| # print "doing " + f |
| for propname, propval in props: |
| actual_propval = runCommand("svn propget --strict " + propname + " " + f) |
| #print propname + ": " + actual_propval |
| if propname == "svn:mime-type" and propval.find("text/") == 0: |
| # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8 |
| propval = check_utf8(f, propval, actual_propval) |
| if not (propval == actual_propval or (propval == "" and actual_propval == "*")): |
| print "svn propset %s '%s' %s" % (propname, propval, f) |
| if fix_problems: |
| os.system("svn propset %s '%s' %s" % (propname, propval, f)) |
| |
| |
| if __name__ == "__main__": |
| main(sys.argv[1:]) |