-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcwzcc.yml
137 lines (133 loc) · 5.25 KB
/
cwzcc.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
template_name: cwzcc
template_label: CWZCC Error Ontology
template_description: Spelling Errors Ontology for the Contemporany Written Zamboangueño Chabacano Corpus (CWZCC).
# ========================================================================================
#
# English Version created Marcelo Yuji Himoro: mhimoro1@alumno.uned.es
# The example is taken from App. B of Himoro et al., 2020 (see citation). Full dataset is not
# publically available due to copyright and privacy concerns.
#
# ========================================================================================
tokenization: char
interface_text:
typology:
target_label: "Document Excerpt:"
edits:
- name: intentional
label: "Intentional Errors"
enable_output: true
color: blue
icon: fa-magnifying-glass
annotation:
- name: intentional_error_type
label: "Intentional Error Type"
options:
- name: abbreviations
label: "Abbreviations"
- name: eye_dialect
label: "Eye Dialect"
- name: inanities
label: "Inanities"
- name: repetitions
label: "Repetitions"
- name: homomorph_glyphs
label: "Use of Homomorph Glyphs"
- name: euphemisms
label: "Euphemisms"
- name: unintentional
label: "Unintentional Errors"
enable_output: true
color: orange
icon: fa-magnifying-glass
annotation:
- name: error_type
label: "Unintentional Error Type"
options:
- name: non_random_errors
label: "Non-random Errors"
options:
- name: arbitrary_spelling
label: "Arbitrary Spelling Errors"
options:
- name: phonogramical
label: "Phonogramical Errors"
options:
- name: homophone_graphemes
label: "Use of Homophone Graphemes"
- name: cognate_interference
label: "Cross-Linguistic Cognate Interference"
- name: phonetic
label: "Phonetic Errors"
options:
- name: phoneme_grapheme
label: "Phoneme-Grapheme Mismatch"
- name: impossible_grapheme
label: "Use of Impossible Graphemes"
- name: regular_error
label: "Regular Spelling Errors"
options:
- name: orthographic
label: "Orthographic Signs Errors"
options:
- name: auxiliary_mark
label: "Auxiliary Mark Errors"
options:
- name: apostrophe_error
label: "Apostrophe Errors"
options:
- name: apostrophe_omission
label: "Apostrophe Omission"
- name: apostrophe_misuse
label: "Misuse of the Apostrophe"
- name: diacritics
label: "Use of Diacritics"
- name: hyphen_error
label: "Hyphen Errors"
options:
- name: hyphen_omission
label: "Hyphen Omission"
- name: hyphen_misuse
label: "Misuse of the Hyphen"
- name: inverted_punctuation
label: "Use of Inverted Punctuation Marks"
- name: segmentation
label: "Segmentation Errors"
options:
- name: space_omission
label: "Space Omission"
- name: space_misuse
label: "Misuse of Spaces"
- name: letter_case
label: "Letter Case Errors"
options:
- name: capitalization_omission
label: "Lack of Capitalisation"
- name: capitalization_misuse
label: "Miscapitalisation"
- name: random
label: "Random Errors"
options:
- name: insertion
label: "Insertion"
- name: omission
label: "Omission"
- name: substitution
label: "Substitution"
- name: transposition
label: "Transposition"
paper_link: https://aclanthology.org/2020.lrec-1.327/
citation: |
@inproceedings{himoro-pareja-lora-2020-towards,
title = "Towards a Spell Checker for {Z}amboanga {C}havacano Orthography",
author = "Himoro, Marcelo Yuji and
Pareja-Lora, Antonio",
booktitle = "Proceedings of the Twelfth Language Resources and Evaluation Conference",
month = may,
year = "2020",
address = "Marseille, France",
publisher = "European Language Resources Association",
url = "https://aclanthology.org/2020.lrec-1.327",
pages = "2685--2697",
language = "English",
ISBN = "979-10-95546-34-4",
}