| #!/usr/bin/python -B |
| # -*- coding: utf-8 -*- |
| |
| # |
| # Copyright (C) 2017 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # |
| # Copyright © 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Copyright (C) 2011 IBM Corporation and Others. All Rights Reserved. |
| # |
| # Run this like so: |
| # cd /path/to/ICU |
| # python /path/to/thisscript/bomfix.py |
| # |
| # it will fixup any files that have a mime-type of "utf-8" but no BOM. |
| |
| import os |
| import codecs |
| import subprocess |
| |
| print "Fixing bom in .\n" |
| |
| ctx = None |
| |
| tree = os.walk(".") |
| |
| nots=0 |
| notutf8=0 |
| noprops=0 |
| utf8=0 |
| fixed=0 |
| tfiles=0 |
| bom=codecs.BOM_UTF8 |
| |
| |
| # my own rewrite |
| def my_propget(prop, path, ignored_rev, ignored_recurs, ignored_ctx): |
| "function_docstring" |
| try: |
| return subprocess.check_output(["svn", "pg", prop, path], stderr=subprocess.STDOUT) |
| except subprocess.CalledProcessError as cpe: |
| # now decode the error |
| if "E200005" in cpe.output: |
| # not under version control |
| return None |
| if "W200017" in cpe.output: |
| # property not found |
| return None |
| else: |
| print "On " + fp + ":\n" + cpe.output + "\n" |
| print "This error wasn't recognized by bomfix, sorry." |
| raise cpe |
| |
| for ent in tree: |
| (path,dirs,files) = ent |
| if(path.find("/.svn") != -1): |
| continue |
| for file in files: |
| tfiles=tfiles+1 |
| revision = None |
| # use relative path |
| fp = path + "/" + file |
| #print "testing " + fp |
| props = my_propget("svn:mime-type", fp, revision, 0, ctx) |
| if not props: |
| noprops = noprops + 1 |
| continue |
| |
| if (fp == "./LICENSE"): |
| print "Skipping: %s" % fp |
| continue |
| |
| type = props |
| |
| # ends with \n because of process |
| if (not type == "text/plain;charset=utf-8\n"): |
| #print fp + ": delta " + type |
| notutf8 = notutf8 + 1 |
| continue |
| |
| # fp is utf-8 |
| utf8=utf8+1 |
| |
| f = open(fp, 'rb') |
| bytes=f.read(3) |
| if not bytes: |
| print fp + ": could not read 3 bytes" |
| continue |
| elif (bytes == bom): |
| #print fp + ": OK" |
| continue |
| |
| f.seek(0) |
| |
| os.rename(fp,fp+".tmp") |
| o=open(fp,'wb') |
| o.write(bom) |
| while(1): |
| bytes = f.read(2048) |
| if bytes: |
| o.write(bytes) |
| else: |
| break |
| o.close() |
| f.close() |
| os.remove(fp+".tmp") |
| fixed=fixed+1 |
| |
| |
| print fp |
| |
| |
| |
| print "%d files, %d not under svn, %d with no props, %d not utf8: %d utf8, %d fixed\n" % (tfiles,nots,noprops,notutf8,utf8,fixed) |