-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathttf2utf.py
56 lines (42 loc) · 1.63 KB
/
ttf2utf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import os
import re
import yaml
DEFAULTS_FILE = 'defaults.yaml'
def _load_defaults(yaml_path):
path = os.path.join(yaml_path, DEFAULTS_FILE)
with open(path, encoding='utf-8') as infile:
default = yaml.load(infile, Loader=yaml.FullLoader)
default['pre-rules'] = [
(re.compile(r[0]), r[1]) for r in default['pre-rules']
]
default['post-rules'] = [
(re.compile(r[0]), r[1]) for r in default['post-rules']
]
return default
def load_rules(yaml_path):
default = _load_defaults(yaml_path)
files = [x for x in os.listdir(yaml_path) if x != DEFAULTS_FILE]
paths = [os.path.join(yaml_path, x) for x in sorted(files)]
paths = [x for x in paths if os.path.isfile(x)]
all_rules = {}
for path in paths:
with open(path, encoding='utf-8') as infile:
rule = yaml.load(infile, Loader=yaml.FullLoader)
rule['post-rules'] = default['post-rules']
rule['pre-rules'] = default['pre-rules']
rule_key = re.sub(r'\s+', '_', rule['name'].lower())
all_rules[rule_key] = rule
return all_rules
def convert_word(word, rule, debug_rule_index=None):
utf_word = word
for rulez in rule.get('pre-rules', []):
utf_word = re.sub(rulez[0], rulez[1], utf_word)
utf_word = ''.join([
rule['char-map'].get(c, c) for c in utf_word
])
for i, rulez in enumerate(rule.get('post-rules', [])):
old_word = utf_word
utf_word = re.sub(rulez[0], rulez[1], utf_word)
if debug_rule_index and i == debug_rule_index:
print(old_word, '->', utf_word)
return utf_word