fix for merge conflict
[official-gcc.git] / libgo / go / ebnf / ebnf.go
blob8333f583093aebf2f7f7fbe8c3856f118a0624d0
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // A library for EBNF grammars. The input is text ([]byte) satisfying
6 // the following grammar (represented itself in EBNF):
7 //
8 // Production = name "=" Expression "." .
9 // Expression = Alternative { "|" Alternative } .
10 // Alternative = Term { Term } .
11 // Term = name | token [ "..." token ] | Group | Option | Repetition .
12 // Group = "(" Expression ")" .
13 // Option = "[" Expression "]" .
14 // Repetition = "{" Expression "}" .
16 // A name is a Go identifier, a token is a Go string, and comments
17 // and white space follow the same rules as for the Go language.
18 // Production names starting with an uppercase Unicode letter denote
19 // non-terminal productions (i.e., productions which allow white-space
20 // and comments between tokens); all other production names denote
21 // lexical productions.
23 package ebnf
25 import (
26 "go/scanner"
27 "go/token"
28 "os"
29 "unicode"
30 "utf8"
34 // ----------------------------------------------------------------------------
35 // Internal representation
37 type (
38 // An Expression node represents a production expression.
39 Expression interface {
40 // Pos is the position of the first character of the syntactic construct
41 Pos() token.Position
44 // An Alternative node represents a non-empty list of alternative expressions.
45 Alternative []Expression // x | y | z
47 // A Sequence node represents a non-empty list of sequential expressions.
48 Sequence []Expression // x y z
50 // A Name node represents a production name.
51 Name struct {
52 token.Position
53 String string
56 // A Token node represents a literal.
57 Token struct {
58 token.Position
59 String string
62 // A List node represents a range of characters.
63 Range struct {
64 Begin, End *Token // begin ... end
67 // A Group node represents a grouped expression.
68 Group struct {
69 token.Position
70 Body Expression // (body)
73 // An Option node represents an optional expression.
74 Option struct {
75 token.Position
76 Body Expression // [body]
79 // A Repetition node represents a repeated expression.
80 Repetition struct {
81 token.Position
82 Body Expression // {body}
85 // A Production node represents an EBNF production.
86 Production struct {
87 Name *Name
88 Expr Expression
91 // A Grammar is a set of EBNF productions. The map
92 // is indexed by production name.
94 Grammar map[string]*Production
98 func (x Alternative) Pos() token.Position {
99 return x[0].Pos() // the parser always generates non-empty Alternative
103 func (x Sequence) Pos() token.Position {
104 return x[0].Pos() // the parser always generates non-empty Sequences
108 func (x Range) Pos() token.Position { return x.Begin.Pos() }
111 func (p *Production) Pos() token.Position { return p.Name.Pos() }
114 // ----------------------------------------------------------------------------
115 // Grammar verification
117 func isLexical(name string) bool {
118 ch, _ := utf8.DecodeRuneInString(name)
119 return !unicode.IsUpper(ch)
123 type verifier struct {
124 scanner.ErrorVector
125 worklist []*Production
126 reached Grammar // set of productions reached from (and including) the root production
127 grammar Grammar
131 func (v *verifier) push(prod *Production) {
132 name := prod.Name.String
133 if _, found := v.reached[name]; !found {
134 v.worklist = append(v.worklist, prod)
135 v.reached[name] = prod
140 func (v *verifier) verifyChar(x *Token) int {
141 s := x.String
142 if utf8.RuneCountInString(s) != 1 {
143 v.Error(x.Pos(), "single char expected, found "+s)
144 return 0
146 ch, _ := utf8.DecodeRuneInString(s)
147 return ch
151 func (v *verifier) verifyExpr(expr Expression, lexical bool) {
152 switch x := expr.(type) {
153 case nil:
154 // empty expression
155 case Alternative:
156 for _, e := range x {
157 v.verifyExpr(e, lexical)
159 case Sequence:
160 for _, e := range x {
161 v.verifyExpr(e, lexical)
163 case *Name:
164 // a production with this name must exist;
165 // add it to the worklist if not yet processed
166 if prod, found := v.grammar[x.String]; found {
167 v.push(prod)
168 } else {
169 v.Error(x.Pos(), "missing production "+x.String)
171 // within a lexical production references
172 // to non-lexical productions are invalid
173 if lexical && !isLexical(x.String) {
174 v.Error(x.Pos(), "reference to non-lexical production "+x.String)
176 case *Token:
177 // nothing to do for now
178 case *Range:
179 i := v.verifyChar(x.Begin)
180 j := v.verifyChar(x.End)
181 if i >= j {
182 v.Error(x.Pos(), "decreasing character range")
184 case *Group:
185 v.verifyExpr(x.Body, lexical)
186 case *Option:
187 v.verifyExpr(x.Body, lexical)
188 case *Repetition:
189 v.verifyExpr(x.Body, lexical)
190 default:
191 panic("unreachable")
196 func (v *verifier) verify(grammar Grammar, start string) {
197 // find root production
198 root, found := grammar[start]
199 if !found {
200 var noPos token.Position
201 v.Error(noPos, "no start production "+start)
202 return
205 // initialize verifier
206 v.ErrorVector.Reset()
207 v.worklist = v.worklist[0:0]
208 v.reached = make(Grammar)
209 v.grammar = grammar
211 // work through the worklist
212 v.push(root)
213 for {
214 n := len(v.worklist) - 1
215 if n < 0 {
216 break
218 prod := v.worklist[n]
219 v.worklist = v.worklist[0:n]
220 v.verifyExpr(prod.Expr, isLexical(prod.Name.String))
223 // check if all productions were reached
224 if len(v.reached) < len(v.grammar) {
225 for name, prod := range v.grammar {
226 if _, found := v.reached[name]; !found {
227 v.Error(prod.Pos(), name+" is unreachable")
234 // Verify checks that:
235 // - all productions used are defined
236 // - all productions defined are used when beginning at start
237 // - lexical productions refer only to other lexical productions
239 func Verify(grammar Grammar, start string) os.Error {
240 var v verifier
241 v.verify(grammar, start)
242 return v.GetError(scanner.Sorted)