Skip to content

Commit 730dffc

Browse files
authored
Merge pull request #6 from rlpowell/master
Added an original-camxes-style parser
2 parents 2c2d68c + 1a841ea commit 730dffc

File tree

7 files changed

+86430
-86332
lines changed

7 files changed

+86430
-86332
lines changed

README.txt

+11
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,17 @@ Partial parsing:
6363
>>> node.end < len("klama ku ku")
6464
True
6565

66+
original-camxes-style parsing:
67+
68+
>>> import camxes_py
69+
>>> from camxes_py.transformers import minimal
70+
>>> minimal_transformer = minimal.Transformer()
71+
>>> text = camxes_py.match("mi la cmen broda fu'ivla li 1 la'o gy english words gy", None, None, minimal_transformer)
72+
>>> text
73+
['sentence', [['terms', [['KOhA', 'mi'], ['sumti_6', [['LA', 'la'], ['CMEVLA', 'cmen']]]]], ['bridi_tail_3', [['selbri_3', [['gismu', 'broda'], ['lujvo', "fu'ivla"]]], ['nonabs_terms', [['li_clause', [['LI', 'li'], ['PA', '1']]], ['ZOI_pre', [['ZOI', "la'o"], ['BY', 'gy'], [['zoi_word', 'english '], ['zoi_word', 'words ']], ['BY', 'gy']]]]]]]]]
74+
>>>
75+
76+
6677
TESTING
6778
=======
6879

TODO

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
- merge any pull requests at
2+
https://github.com/teleological/camxes-py/pulls to this repo
3+
- add a test case in for the minimal transformer

camxes.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111
import camxes_py.parsimonious_ext # expression_nodes
1212

13-
__version__ = "v0.9.0"
13+
__version__ = "v0.10.0"
1414

1515
PARSERS = [ 'camxes-ilmen' ]
16-
TRANSFORMERS = [ 'camxes-json', 'camxes-morphology', 'vlatai', 'node-coverage', 'debug', 'raw' ]
16+
TRANSFORMERS = [ 'camxes-json', 'camxes-morphology', 'minimal', 'vlatai', 'node-coverage', 'debug', 'raw' ]
1717
SERIALIZERS = [ 'json', 'json-pretty', 'json-compact', 'xml' ]
1818

1919
IMPLEMENTATION_RECURSION_LIMIT = {
@@ -102,6 +102,9 @@ def build_transformer(transformer_option, parser):
102102
elif transformer_option == 'vlatai':
103103
from camxes_py.transformers import vlatai
104104
return vlatai.Transformer()
105+
elif transformer_option == 'minimal':
106+
from camxes_py.transformers import minimal
107+
return minimal.Transformer()
105108
elif transformer_option == 'node-coverage':
106109
from camxes_py.transformers import node_coverage
107110
return node_coverage.Transformer(parser)

camxes_py/transformers/minimal.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
2+
# pylint: disable=I0011, C0111, C0326, no-self-use, unused-argument, invalid-name
3+
4+
import re
5+
6+
from parsimonious.nodes import NodeVisitor
7+
8+
def is_selmaho_expression(name):
9+
return re.match(r"^[ABCDFGIJKLMNPRSTUVXYZ]([AEIOUY]([IU]|h[AEIOU])?)?$", name)
10+
11+
class Transformer(object):
12+
13+
def transform(self, parsed):
14+
return Visitor().visit(parsed)
15+
16+
def default_serializer(self):
17+
return lambda x : x.as_json()
18+
19+
class Visitor(NodeVisitor):
20+
21+
def visit_space_char(self, node, visited_children):
22+
return None
23+
24+
def visit_EOF(self, node, visited_children):
25+
return None
26+
27+
def visit_CMEVLA(self, node, visited_children):
28+
return ['CMEVLA', node.text]
29+
30+
def visit_zoi_word(self, node, visited_children):
31+
return ['zoi_word', node.text]
32+
33+
def visit_gismu_2(self, node, visited_children):
34+
return ['gismu', node.text]
35+
36+
def visit_lujvo(self, node, visited_children):
37+
return ['lujvo', node.text]
38+
39+
def visit_fuhivla(self, node, visited_children):
40+
return ["fu'ivla", node.text]
41+
42+
def generic_visit(self, node, visited_children):
43+
# Catch all the cmavo
44+
if node.expr_name and is_selmaho_expression(node.expr_name):
45+
return [node.expr_name, node.text]
46+
47+
# Catch all the spaces
48+
if node.expr_name == "" and node.text.strip() == "":
49+
return None
50+
51+
# Drop the crap
52+
proper_children = list(filter(None, visited_children))
53+
54+
if len(proper_children) == 0:
55+
if len(node.text) > 0:
56+
return [node.expr_name, node.text]
57+
else:
58+
return None
59+
else:
60+
# Make a tree
61+
if len(proper_children) == 1:
62+
# Catch the special case where an empty name got used
63+
# *and* we're the only parent so we can use our name
64+
if len(proper_children[0]) > 1 and proper_children[0][0] == "":
65+
proper_children[0][0] = node.expr_name
66+
67+
return proper_children[0]
68+
else:
69+
# Catch empty names that we *can't* replace because
70+
# we're not the only parent by just dropping that
71+
# layer (this happens with ZOI clauses, for example)
72+
73+
new_children = []
74+
for child in proper_children:
75+
if len(child) > 1 and child[0] == "":
76+
new_children.append(child[1])
77+
else:
78+
new_children.append(child)
79+
80+
return [node.expr_name, list(new_children)]

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setuptools.setup(
1717
name=PACKAGE_NAME,
18-
version="0.9.0",
18+
version="0.10.0",
1919
author="Robin Lee Powell",
2020
author_email="rlpowell@digitalkingdom.org",
2121
description="A pure Python implementation of the lojban 'camxes' PEG parser.",

test.py

+1
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def process_spec(input_spec, parser, json_transformer, morph_transformer):
5555

5656
out = morph = None
5757
try:
58+
print("text: " + text)
5859
parsed = parser.parse(text)
5960
out = transform_to_serial(parsed, json_transformer)
6061
morph = transform_to_serial(parsed, morph_transformer)

0 commit comments

Comments
 (0)