-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.go
122 lines (115 loc) · 2.65 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
package cfg
import (
"fmt"
"github.com/0x51-dev/upeg/parser"
"github.com/0x51-dev/upeg/parser/op"
)
var (
grammar = op.Capture{
Name: "CFG",
Value: op.And{
op.ZeroOrMore{Value: op.EndOfLine{}},
op.OneOrMore{
Value: productionRule,
},
},
}
nonTerminal = op.Capture{
Name: "NonTerminal",
Value: op.RuneRange{Min: 'A', Max: 'Z'},
}
terminal = op.Capture{
Name: "Terminal",
Value: op.Or{
op.RuneRange{Min: 'a', Max: 'z'},
'(', ')', '[', ']',
},
}
epsilon = op.Capture{
Name: "Epsilon",
Value: 'ε',
}
expression = op.Capture{
Name: "Expression",
Value: op.Or{op.OneOrMore{Value: op.Or{terminal, nonTerminal}}, epsilon},
}
productionRule = op.Capture{
Name: "ProductionRule",
Value: op.And{
nonTerminal,
op.Or{'→', "->"},
expression,
op.ZeroOrMore{Value: op.And{'|', expression}},
op.EndOfLine{},
},
}
)
func parseGrammar(n *parser.Node) (*CFG, error) {
if n.Name != "CFG" {
return nil, fmt.Errorf("expected CFG, got %s", n.Name)
}
var start Variable
vm := make(map[Variable]struct{})
tm := make(map[Terminal]struct{})
var productions []Production
for _, n := range n.Children() {
if n.Name != "ProductionRule" {
return nil, fmt.Errorf("expected ProductionRule, got %s", n.Name)
}
if len(n.Children()) < 2 {
return nil, fmt.Errorf("expected at least 2 children, got %d", len(n.Children()))
}
v := Variable(n.Children()[0].Value())
if _, ok := vm[v]; !ok {
if start == "" {
// First non-terminal is the start symbol.
start = v
}
vm[v] = struct{}{}
}
for _, n := range n.Children()[1:] {
if n.Name != "Expression" {
return nil, fmt.Errorf("expected Expression, got %s", n.Name)
}
var ts []Beta
for _, n := range n.Children() {
switch n.Name {
case "Terminal":
t := Terminal(n.Value())
ts = append(ts, t)
if _, ok := tm[t]; !ok {
tm[t] = struct{}{}
}
case "NonTerminal":
ts = append(ts, Variable(n.Value()))
case "Epsilon":
ts = append(ts, Epsilon)
default:
return nil, fmt.Errorf("expected Terminal, NonTerminal, or Epsilon, got %s", n.Name)
}
}
productions = append(productions, Production{A: v, B: ts})
}
}
var variables []Variable
for v := range vm {
variables = append(variables, v)
}
var terminals []Terminal
for t := range tm {
terminals = append(terminals, t)
}
return New(variables, terminals, productions, start)
}
func Parse(input string) (*CFG, error) {
p, err := parser.New([]rune(input))
if err != nil {
return nil, err
}
p.SetIgnoreList([]any{' ', '\t'})
n, err := p.Parse(op.And{grammar, op.EOF{}})
if err != nil {
return nil, err
}
return parseGrammar(n)
}