-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathindex.ts
82 lines (73 loc) · 2.18 KB
/
index.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import bindings from 'bindings';
const addon = bindings('hfstol_addon');
interface CppTransducerInterface {
new(fstFilename: string): CppTransducerInterface;
_lookup_symbols(text: string): string[][]
}
const CppTransducer = addon.Transducer as CppTransducerInterface;
export class Transducer extends CppTransducer {
/**
* Apply FST to text, returning array of analyses strings.
*
* E.g., lookup("atim") => ["atim+N+A+Sg", "atimêw+V+TA+Imp+Imm+2Sg+3SgO"]
*/
lookup_symbols(text: string) {
if (arguments.length !== 1) {
throw new Error("Wrong number of arguments");
}
// Actual implementation is in C++
return this._lookup_symbols(text);
}
/**
* Apply FST to text, returning array of analyses strings.
*
* E.g., lookup("atim") => ["atim+N+A+Sg", "atimêw+V+TA+Imp+Imm+2Sg+3SgO"]
*/
lookup(text: string) {
if (arguments.length !== 1) {
throw new Error("Wrong number of arguments");
}
const ret = [];
for (const analysis of this.lookup_symbols(text)) {
ret.push(analysis.join(""));
}
return ret;
}
/**
* Apply FST to text, returning array of (1) array of prefix tags
* (2) concatenated lemma and (3) array of suffix tags.
*
* E.g., lookup_lemma_with_affixes("kî-atimik")) ⇒
* [[["PV/ki"], "atimêw", ["+V", "+TA", "+Ind", "+4Sg/Pl", "+3SgO"]]]
*/
lookup_lemma_with_affixes(text: string) {
const ret: [string[], string, string[]][] = [];
for (const analysis of this.lookup_symbols(text)) {
const before = [];
let beforeDone = false;
let lemma = "";
let lemmaDone = false;
const after = [];
for (const symbol of analysis) {
if (symbol.length == 1) {
// symbol is a character
beforeDone = true;
if (lemmaDone) {
throw Error(`Unable to parse ${analysis} into lemma and affixes`);
}
lemma += symbol;
} else {
// symbol is a tag
if (!beforeDone) {
before.push(symbol);
} else {
lemmaDone = true;
after.push(symbol);
}
}
}
ret.push([before, lemma, after]);
}
return ret;
}
}