blob: cc23cde2d6f8d1dca2cc5e4b3c4dbafcf86ca141 [file] [log] [blame]
#! /usr/bin/python
from __future__ import absolute_import
from __future__ import print_function
try:
from urllib2 import urlopen
from urllib2 import URLError
except ImportError:
from urllib.request import urlopen
from urllib.error import URLError
import sys
import os
from lxml import html
from six.moves import range
datafile = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'list-unicodeset.html')
try:
fp = urlopen('http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[%3AGC%3DZs%3A][%3ADI%3A]&abb=on&ucd=on&esc=on&g')
data = fp.read()
fp.close()
fp = open(datafile, 'wb');
fp.write(data);
fp.close();
except URLError:
# fall back reading the static data in repo
try:
fp = open(datafile)
data = fp.read()
fp.close()
except IOError:
sys.stderr.write("Error: No static data to generate the blank data. please make sure the network connection is reachable to Unicode.org\n")
sys.exit(1)
dom = html.fromstring(data)
x = dom.xpath('/html/body/form/p/text()')
p = x[1]
if p[0] == '[' and p[-1] == ']':
p = p.replace('[', '').replace(']', '')
else:
sys.exit(1)
fescape = False
funicode = False
frange = False
fprocess = False
v = 0
vbegin = 0
vend = 0
n = 0
l = []
def insert(db, begin, end):
db.append([begin, end])
for i in p:
if i == '\\':
if n > 0:
if frange == True and funicode == True:
vend = v
insert(l, vbegin, vend)
fprocess = True
elif funicode == True:
vbegin = v
vend = v
insert(l, vbegin, vend)
fprocess = True
funicode = False
fescape = True
elif i.lower() == 'u' and fescape == True:
funicode = True
fescape = False
elif i >= '0' and i <= '9' or i.lower() >= 'a' and i.lower() <= 'f':
if fescape == True:
raise RuntimeError("Unexpected escape code")
if funicode == True:
v <<= 4
v += int(i, 16)
else:
raise RuntimeError("Unable to parse Unicode")
elif i == ' ':
if fescape == True:
funicode = True
fescape = False
v = 0x20
if frange == True and funicode == True:
vend = v
insert(l, vbegin, vend)
fprocess = True
elif funicode == True:
vbegin = v
vend = v
insert(l, vbegin, vend)
fprocess = True
funicode = False
frange = False
elif i == '-':
if fescape == True:
raise RuntimeError("Unexpected escape code")
vbegin = v
v = 0
funicode = False
frange = True
else:
raise RuntimeError("Unable to parse Unicode: %s" % i)
if fprocess == True:
vbegin = 0
vend = 0
v = 0
fprocess = False
funicode = False
frange = False
n += 1
if frange == True and funicode == True:
vend = v
insert(l, vbegin, vend)
elif funicode == True:
vbegin = vend = v
insert(l, vbegin, vend)
ncode = 0
for i in l:
ncode += (i[1] - i[0] + 1)
a = int(x[0].split(' ')[0].replace(',', ''))
if a != ncode:
sys.stderr.write("Unexpected the amount of code points: %d (expected %d)\n" % (ncode, a))
sys.exit(1)
# exception; BRAILLE PATTERN BLANK
insert(l, 0x2800, 0x2800)
while True:
s = sys.stdin.readline().rstrip()
if s == "@@@":
break
print(s)
print("static FcChar32 _fcBlanks[%s] = {" % (ncode + 1))
k = 0
for i in sorted(l, key=lambda a: a[0]):
for j in range(i[0], i[1] + 1):
if k != 0:
print(",")
print(" 0x%04x" % j, end=' ')
k += 1
print("};")
print('''
static FcBlanks fcBlanks = {
%s,
-1,
_fcBlanks
};
''' % (ncode + 1))