summaryrefslogtreecommitdiff
path: root/poppler/gen-unicode-tables.py
blob: 282f6e525180bd9fcd4a2737c93d6acb3580ec5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
UNICODE_LAST_CHAR_PART1 = 0x2FAFF
HANGUL_S_BASE = 0xAC00
HANGUL_S_COUNT = 19 * 21 * 28
import unicodedata

print """// Generated by gen-unicode-tables.py

typedef struct {
  Unicode character;
  int length;
  int offset;
} decomposition;
"""

decomp_table = []
max_index = 0
decomp_expansion_index = {}
decomp_expansion = []
for u in xrange(0, UNICODE_LAST_CHAR_PART1):
	if (u >= HANGUL_S_BASE and u < HANGUL_S_BASE + HANGUL_S_COUNT):
		continue
	norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u))))
	if norm != (u,):
		try: 
			i = decomp_expansion_index[norm]
			decomp_table.append((u, len(norm), i))
		except KeyError:
			decomp_table.append((u, len(norm), max_index))
			decomp_expansion_index[norm] = max_index
			decomp_expansion.append((norm, max_index))
			max_index += len(norm)
print "#define DECOMP_TABLE_LENGTH %d\n" % len(decomp_table)
print "static const decomposition decomp_table[] = {\n%s\n};\n" % ", \n".join(
		"  { 0x%x, %d, %d }" % (character, length, offset)
		for character, length, offset in decomp_table)
print "static const Unicode decomp_expansion[] = {\n%s\n};\n" % ", \n".join(
		"  %s /* offset %d */ " % (", ".join("0x%x" % u for u in norm), 
			index) for norm, index in decomp_expansion)