1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // A library for EBNF grammars. The input is text ([]byte) satisfying
6 // the following grammar (represented itself in EBNF):
8 // Production = name "=" Expression "." .
9 // Expression = Alternative { "|" Alternative } .
10 // Alternative = Term { Term } .
11 // Term = name | token [ "..." token ] | Group | Option | Repetition .
12 // Group = "(" Expression ")" .
13 // Option = "[" Expression "]" .
14 // Repetition = "{" Expression "}" .
16 // A name is a Go identifier, a token is a Go string, and comments
17 // and white space follow the same rules as for the Go language.
18 // Production names starting with an uppercase Unicode letter denote
19 // non-terminal productions (i.e., productions which allow white-space
20 // and comments between tokens); all other production names denote
21 // lexical productions.
34 // ----------------------------------------------------------------------------
35 // Internal representation
38 // An Expression node represents a production expression.
39 Expression
interface {
40 // Pos is the position of the first character of the syntactic construct
44 // An Alternative node represents a non-empty list of alternative expressions.
45 Alternative
[]Expression
// x | y | z
47 // A Sequence node represents a non-empty list of sequential expressions.
48 Sequence
[]Expression
// x y z
50 // A Name node represents a production name.
56 // A Token node represents a literal.
62 // A List node represents a range of characters.
64 Begin
, End
*Token
// begin ... end
67 // A Group node represents a grouped expression.
70 Body Expression
// (body)
73 // An Option node represents an optional expression.
76 Body Expression
// [body]
79 // A Repetition node represents a repeated expression.
82 Body Expression
// {body}
85 // A Production node represents an EBNF production.
91 // A Grammar is a set of EBNF productions. The map
92 // is indexed by production name.
94 Grammar
map[string]*Production
98 func (x Alternative
) Pos() token
.Position
{
99 return x
[0].Pos() // the parser always generates non-empty Alternative
103 func (x Sequence
) Pos() token
.Position
{
104 return x
[0].Pos() // the parser always generates non-empty Sequences
108 func (x Range
) Pos() token
.Position
{ return x
.Begin
.Pos() }
111 func (p
*Production
) Pos() token
.Position
{ return p
.Name
.Pos() }
114 // ----------------------------------------------------------------------------
115 // Grammar verification
117 func isLexical(name
string) bool {
118 ch
, _
:= utf8
.DecodeRuneInString(name
)
119 return !unicode
.IsUpper(ch
)
123 type verifier
struct {
125 worklist
[]*Production
126 reached Grammar
// set of productions reached from (and including) the root production
131 func (v
*verifier
) push(prod
*Production
) {
132 name
:= prod
.Name
.String
133 if _
, found
:= v
.reached
[name
]; !found
{
134 v
.worklist
= append(v
.worklist
, prod
)
135 v
.reached
[name
] = prod
140 func (v
*verifier
) verifyChar(x
*Token
) int {
142 if utf8
.RuneCountInString(s
) != 1 {
143 v
.Error(x
.Pos(), "single char expected, found "+s
)
146 ch
, _
:= utf8
.DecodeRuneInString(s
)
151 func (v
*verifier
) verifyExpr(expr Expression
, lexical
bool) {
152 switch x
:= expr
.(type) {
156 for _
, e
:= range x
{
157 v
.verifyExpr(e
, lexical
)
160 for _
, e
:= range x
{
161 v
.verifyExpr(e
, lexical
)
164 // a production with this name must exist;
165 // add it to the worklist if not yet processed
166 if prod
, found
:= v
.grammar
[x
.String
]; found
{
169 v
.Error(x
.Pos(), "missing production "+x
.String
)
171 // within a lexical production references
172 // to non-lexical productions are invalid
173 if lexical
&& !isLexical(x
.String
) {
174 v
.Error(x
.Pos(), "reference to non-lexical production "+x
.String
)
177 // nothing to do for now
179 i
:= v
.verifyChar(x
.Begin
)
180 j
:= v
.verifyChar(x
.End
)
182 v
.Error(x
.Pos(), "decreasing character range")
185 v
.verifyExpr(x
.Body
, lexical
)
187 v
.verifyExpr(x
.Body
, lexical
)
189 v
.verifyExpr(x
.Body
, lexical
)
196 func (v
*verifier
) verify(grammar Grammar
, start
string) {
197 // find root production
198 root
, found
:= grammar
[start
]
200 var noPos token
.Position
201 v
.Error(noPos
, "no start production "+start
)
205 // initialize verifier
206 v
.ErrorVector
.Reset()
207 v
.worklist
= v
.worklist
[0:0]
208 v
.reached
= make(Grammar
)
211 // work through the worklist
214 n
:= len(v
.worklist
) - 1
218 prod
:= v
.worklist
[n
]
219 v
.worklist
= v
.worklist
[0:n
]
220 v
.verifyExpr(prod
.Expr
, isLexical(prod
.Name
.String
))
223 // check if all productions were reached
224 if len(v
.reached
) < len(v
.grammar
) {
225 for name
, prod
:= range v
.grammar
{
226 if _
, found
:= v
.reached
[name
]; !found
{
227 v
.Error(prod
.Pos(), name
+" is unreachable")
234 // Verify checks that:
235 // - all productions used are defined
236 // - all productions defined are used when beginning at start
237 // - lexical productions refer only to other lexical productions
239 func Verify(grammar Grammar
, start
string) os
.Error
{
241 v
.verify(grammar
, start
)
242 return v
.GetError(scanner
.Sorted
)