| #! /usr/bin/python |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| try: |
| from urllib2 import urlopen |
| from urllib2 import URLError |
| except ImportError: |
| from urllib.request import urlopen |
| from urllib.error import URLError |
| |
| import sys |
| import os |
| from lxml import html |
| from six.moves import range |
| |
| datafile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'list-unicodeset.html') |
| try: |
| fp = urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g') |
| data = fp.read() |
| fp.close() |
| fp = open(datafile, 'wb'); |
| fp.write(data); |
| fp.close(); |
| except URLError: |
| # fall back reading the static data in repo |
| try: |
| fp = open(datafile) |
| data = fp.read() |
| fp.close() |
| except IOError: |
| sys.stderr.write("Error: No static data to generate the blank data. please make sure the network connection is reachable to Unicode.org\n") |
| sys.exit(1) |
| |
| dom = html.fromstring(data) |
| x = dom.xpath('/html/body/form/p/text()') |
| p = x[1] |
| if p[0] == '[' and p[-1] == ']': |
| p = p.replace('[', '').replace(']', '') |
| else: |
| sys.exit(1) |
| fescape = False |
| funicode = False |
| frange = False |
| fprocess = False |
| v = 0 |
| vbegin = 0 |
| vend = 0 |
| n = 0 |
| l = [] |
| |
| def insert(db, begin, end): |
| db.append([begin, end]) |
| |
| for i in p: |
| if i == '\\': |
| if n > 0: |
| if frange == True and funicode == True: |
| vend = v |
| insert(l, vbegin, vend) |
| fprocess = True |
| elif funicode == True: |
| vbegin = v |
| vend = v |
| insert(l, vbegin, vend) |
| fprocess = True |
| funicode = False |
| fescape = True |
| elif i.lower() == 'u' and fescape == True: |
| funicode = True |
| fescape = False |
| elif i >= '0' and i <= '9' or i.lower() >= 'a' and i.lower() <= 'f': |
| if fescape == True: |
| raise RuntimeError("Unexpected escape code") |
| if funicode == True: |
| v <<= 4 |
| v += int(i, 16) |
| else: |
| raise RuntimeError("Unable to parse Unicode") |
| elif i == ' ': |
| if fescape == True: |
| funicode = True |
| fescape = False |
| v = 0x20 |
| if frange == True and funicode == True: |
| vend = v |
| insert(l, vbegin, vend) |
| fprocess = True |
| elif funicode == True: |
| vbegin = v |
| vend = v |
| insert(l, vbegin, vend) |
| fprocess = True |
| funicode = False |
| frange = False |
| elif i == '-': |
| if fescape == True: |
| raise RuntimeError("Unexpected escape code") |
| vbegin = v |
| v = 0 |
| funicode = False |
| frange = True |
| else: |
| raise RuntimeError("Unable to parse Unicode: %s" % i) |
| |
| if fprocess == True: |
| vbegin = 0 |
| vend = 0 |
| v = 0 |
| fprocess = False |
| funicode = False |
| frange = False |
| n += 1 |
| |
| if frange == True and funicode == True: |
| vend = v |
| insert(l, vbegin, vend) |
| elif funicode == True: |
| vbegin = vend = v |
| insert(l, vbegin, vend) |
| |
| ncode = 0 |
| for i in l: |
| ncode += (i[1] - i[0] + 1) |
| |
| a = int(x[0].split(' ')[0].replace(',', '')) |
| if a != ncode: |
| sys.stderr.write("Unexpected the amount of code points: %d (expected %d)\n" % (ncode, a)) |
| sys.exit(1) |
| |
| # exception; BRAILLE PATTERN BLANK |
| insert(l, 0x2800, 0x2800) |
| |
| while True: |
| s = sys.stdin.readline().rstrip() |
| if s == "@@@": |
| break |
| print(s) |
| |
| print("static FcChar32 _fcBlanks[%s] = {" % (ncode + 1)) |
| k = 0 |
| for i in sorted(l, key=lambda a: a[0]): |
| for j in range(i[0], i[1] + 1): |
| if k != 0: |
| print(",") |
| print(" 0x%04x" % j, end=' ') |
| k += 1 |
| |
| print("};") |
| print(''' |
| static FcBlanks fcBlanks = { |
| %s, |
| -1, |
| _fcBlanks |
| }; |
| ''' % (ncode + 1)) |