-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathapertium-sah.sah.rlx
122 lines (93 loc) · 2.55 KB
/
apertium-sah.sah.rlx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# Firstly, we need to define what tags should be considered sentence delimiters. For this example, only full stop is set as delimiter.
DELIMITERS = "<.>" ;
SUBREADINGS = LTR ; # Alternate, left-to-right (main reading on the left)
# ============= #
# Tags and sets #
# ============= #
SETS
LIST BOS = (>>>) ;
LIST EOS = (<<<) ;
# First-level/Parts-of-speech tags
# ================================
LIST A = adj ;
LIST Adv = adv ;
LIST Pron = prn ;
LIST Pron-Pers = (prn pers) ;
LIST N = n ;
LIST IJ = ij ;
LIST Prop = np ;
LIST V = v ;
LIST Vaux = vaux ;
LIST Cop = cop ;
LIST Det = det ;
LIST CC = cnjcoo ;
LIST CS = cnjsub ;
LIST Interj = ij ;
LIST Pres = pres ;
LIST Num = num ;
LIST Post = post ;
LIST Postadv = postadv ;
LIST FinalClitic = mod_ass mod_emo mod qst emph ;
LIST Cm = cm ;
LIST Adv = adv ;
LIST Rquot = rquot ;
LIST Excl = "!" ;
# POS sub-categories
# ==================
LIST Interr = itg ;
# Morphosyntactic properties # if you find yourself embracing the same tag in
# ========================== # brackets over and over again, you may want to
# add it here
LIST Nom = nom ;
LIST Gen = gen ;
LIST Dat = dat ;
LIST Acc = acc ;
LIST Abl = abl ;
LIST Loc = loc ;
LIST Attr = attr ;
LIST Subst = subst ;
LIST Prc = prc_perf prc_impf ;
# Verb sets
# =========
LIST FiniteVerb = pres aor past ifi ifi_evid fut fut_plan imp opt pih ;
LIST V-P3 = (v p3) (vaux p3) (cop p3) ;
LIST Gerund = ger ger_ppot ger_past ger_perf ger_impf ger_abs ger_aor ;
SET V-NotGerund = V - Gerund ;
LIST Participle = prc_perf prc_impf prc_cond prc_vol prc_plan ;
LIST VerbalAdverb = gna_perf gna_cond gna_until gna_after ;
LIST VerbalAdjective = gpr_past gpr_fut gpr_pot gpr_impf ;
LIST SubstPl = (subst pl) ;
SET MARK = Cm | ("\\") | ("\;") | ("\(") ;
SECTION
# unlikely to be an auxiliary
# if it can be a lexical verb
# and the thing before it can't be an infinitive
REMOVE Vaux IF
(0 V)
(NOT -1 Prc)
;
# probably not a lexical verb
# if it can be an auxiliary
# and the thing before it is an infinitive
REMOVE V IF
(0 Vaux)
(-1 Prc)
;
SELECT Participle IF
(0C Participle OR VerbalAdverb)
(1 Vaux)
;
# probably not a copula if not end of sentence
REMOVE SUB:1 Cop IF
(NOT 1 EOS OR MARK)
;
REMOVE SUB:1 Cop IF
(-1 BOS OR MARK) ## Headings or enumerations
(NOT 1 EOS)
;
# Resources:
# http://visl.sdu.dk/cg3.html
# http://groups.google.com/group/constraint-grammar
# http://kevindonnelly.org.uk/2010/05/constraint-grammar-tutorial/
# https://wiki.apertium.org/wiki/Constraint_Grammar
# https://wiki.apertium.org/wiki/Apertium_and_Constraint_Grammar