1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
14 // item represents a token or text string returned from the scanner.
16 typ itemType
// The type of this item.
17 pos Pos
// The starting position, in bytes, of this item in the input string.
18 val
string // The value of this item.
19 line
int // The line number at the start of this item.
22 func (i item
) String() string {
24 case i
.typ
== itemEOF
:
26 case i
.typ
== itemError
:
28 case i
.typ
> itemKeyword
:
29 return fmt
.Sprintf("<%s>", i
.val
)
31 return fmt
.Sprintf("%.10q...", i
.val
)
33 return fmt
.Sprintf("%q", i
.val
)
36 // itemType identifies the type of lex items.
40 itemError itemType
= iota // error occurred; value is text of error
41 itemBool
// boolean constant
42 itemChar
// printable ASCII character; grab bag for comma etc.
43 itemCharConstant
// character constant
44 itemComplex
// complex constant (1+2i); imaginary is just a number
45 itemColonEquals
// colon-equals (':=') introducing a declaration
47 itemField
// alphanumeric identifier starting with '.'
48 itemIdentifier
// alphanumeric identifier not starting with '.'
49 itemLeftDelim
// left action delimiter
50 itemLeftParen
// '(' inside action
51 itemNumber
// simple number, including imaginary
52 itemPipe
// pipe symbol
53 itemRawString
// raw quoted string (includes quotes)
54 itemRightDelim
// right action delimiter
55 itemRightParen
// ')' inside action
56 itemSpace
// run of spaces separating arguments
57 itemString
// quoted string (includes quotes)
58 itemText
// plain text
59 itemVariable
// variable starting with '$', such as '$' or '$1' or '$hello'
60 // Keywords appear after all the rest.
61 itemKeyword
// used only to delimit the keywords
62 itemBlock
// block keyword
63 itemDot
// the cursor, spelled '.'
64 itemDefine
// define keyword
65 itemElse
// else keyword
66 itemEnd
// end keyword
68 itemNil
// the untyped nil constant, easiest to treat as a keyword
69 itemRange
// range keyword
70 itemTemplate
// template keyword
71 itemWith
// with keyword
74 var key
= map[string]itemType
{
83 "template": itemTemplate
,
90 // If the action begins "{{- " rather than "{{", then all space/tab/newlines
91 // preceding the action are trimmed; conversely if it ends " -}}" the
92 // leading spaces are trimmed. This is done entirely in the lexer; the
93 // parser never sees it happen. We require an ASCII space to be
94 // present to avoid ambiguity with things like "{{-3}}". It reads
95 // better with the space present anyway. For simplicity, only ASCII
96 // space does the job.
98 spaceChars
= " \t\r\n" // These are the space characters defined by Go itself.
99 leftTrimMarker
= "- " // Attached to left delimiter, trims trailing spaces from preceding text.
100 rightTrimMarker
= " -" // Attached to right delimiter, trims leading spaces from following text.
101 trimMarkerLen
= Pos(len(leftTrimMarker
))
104 // stateFn represents the state of the scanner as a function that returns the next state.
105 type stateFn
func(*lexer
) stateFn
107 // lexer holds the state of the scanner.
109 name
string // the name of the input; used only for error reports
110 input
string // the string being scanned
111 leftDelim
string // start of action
112 rightDelim
string // end of action
113 pos Pos
// current position in the input
114 start Pos
// start position of this item
115 width Pos
// width of last rune read from input
116 items
chan item
// channel of scanned items
117 parenDepth
int // nesting depth of ( ) exprs
118 line
int // 1+number of newlines seen
121 // next returns the next rune in the input.
122 func (l
*lexer
) next() rune
{
123 if int(l
.pos
) >= len(l
.input
) {
127 r
, w
:= utf8
.DecodeRuneInString(l
.input
[l
.pos
:])
136 // peek returns but does not consume the next rune in the input.
137 func (l
*lexer
) peek() rune
{
143 // backup steps back one rune. Can only be called once per call of next.
144 func (l
*lexer
) backup() {
146 // Correct newline count.
147 if l
.width
== 1 && l
.input
[l
.pos
] == '\n' {
152 // emit passes an item back to the client.
153 func (l
*lexer
) emit(t itemType
) {
154 l
.items
<- item
{t
, l
.start
, l
.input
[l
.start
:l
.pos
], l
.line
}
155 // Some items contain text internally. If so, count their newlines.
157 case itemText
, itemRawString
, itemLeftDelim
, itemRightDelim
:
158 l
.line
+= strings
.Count(l
.input
[l
.start
:l
.pos
], "\n")
163 // ignore skips over the pending input before this point.
164 func (l
*lexer
) ignore() {
165 l
.line
+= strings
.Count(l
.input
[l
.start
:l
.pos
], "\n")
169 // accept consumes the next rune if it's from the valid set.
170 func (l
*lexer
) accept(valid
string) bool {
171 if strings
.ContainsRune(valid
, l
.next()) {
178 // acceptRun consumes a run of runes from the valid set.
179 func (l
*lexer
) acceptRun(valid
string) {
180 for strings
.ContainsRune(valid
, l
.next()) {
185 // errorf returns an error token and terminates the scan by passing
186 // back a nil pointer that will be the next state, terminating l.nextItem.
187 func (l
*lexer
) errorf(format
string, args
...interface{}) stateFn
{
188 l
.items
<- item
{itemError
, l
.start
, fmt
.Sprintf(format
, args
...), l
.line
}
192 // nextItem returns the next item from the input.
193 // Called by the parser, not in the lexing goroutine.
194 func (l
*lexer
) nextItem() item
{
198 // drain drains the output so the lexing goroutine will exit.
199 // Called by the parser, not in the lexing goroutine.
200 func (l
*lexer
) drain() {
205 // lex creates a new scanner for the input string.
206 func lex(name
, input
, left
, right
string) *lexer
{
218 items
: make(chan item
),
225 // run runs the state machine for the lexer.
226 func (l
*lexer
) run() {
227 for state
:= lexText
; state
!= nil; {
242 // lexText scans until an opening action delimiter, "{{".
243 func lexText(l
*lexer
) stateFn
{
245 if x
:= strings
.Index(l
.input
[l
.pos
:], l
.leftDelim
); x
>= 0 {
246 ldn
:= Pos(len(l
.leftDelim
))
249 if strings
.HasPrefix(l
.input
[l
.pos
+ldn
:], leftTrimMarker
) {
250 trimLength
= rightTrimLength(l
.input
[l
.start
:l
.pos
])
260 l
.pos
= Pos(len(l
.input
))
262 // Correctly reached EOF.
270 // rightTrimLength returns the length of the spaces at the end of the string.
271 func rightTrimLength(s
string) Pos
{
272 return Pos(len(s
) - len(strings
.TrimRight(s
, spaceChars
)))
275 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.
276 func (l
*lexer
) atRightDelim() (delim
, trimSpaces
bool) {
277 if strings
.HasPrefix(l
.input
[l
.pos
:], l
.rightDelim
) {
280 // The right delim might have the marker before.
281 if strings
.HasPrefix(l
.input
[l
.pos
:], rightTrimMarker
) &&
282 strings
.HasPrefix(l
.input
[l
.pos
+trimMarkerLen
:], l
.rightDelim
) {
288 // leftTrimLength returns the length of the spaces at the beginning of the string.
289 func leftTrimLength(s
string) Pos
{
290 return Pos(len(s
) - len(strings
.TrimLeft(s
, spaceChars
)))
293 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
294 func lexLeftDelim(l
*lexer
) stateFn
{
295 l
.pos
+= Pos(len(l
.leftDelim
))
296 trimSpace
:= strings
.HasPrefix(l
.input
[l
.pos
:], leftTrimMarker
)
297 afterMarker
:= Pos(0)
299 afterMarker
= trimMarkerLen
301 if strings
.HasPrefix(l
.input
[l
.pos
+afterMarker
:], leftComment
) {
306 l
.emit(itemLeftDelim
)
310 return lexInsideAction
313 // lexComment scans a comment. The left comment marker is known to be present.
314 func lexComment(l
*lexer
) stateFn
{
315 l
.pos
+= Pos(len(leftComment
))
316 i
:= strings
.Index(l
.input
[l
.pos
:], rightComment
)
318 return l
.errorf("unclosed comment")
320 l
.pos
+= Pos(i
+ len(rightComment
))
321 delim
, trimSpace
:= l
.atRightDelim()
323 return l
.errorf("comment ends before closing delimiter")
326 l
.pos
+= trimMarkerLen
328 l
.pos
+= Pos(len(l
.rightDelim
))
330 l
.pos
+= leftTrimLength(l
.input
[l
.pos
:])
336 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.
337 func lexRightDelim(l
*lexer
) stateFn
{
338 trimSpace
:= strings
.HasPrefix(l
.input
[l
.pos
:], rightTrimMarker
)
340 l
.pos
+= trimMarkerLen
343 l
.pos
+= Pos(len(l
.rightDelim
))
344 l
.emit(itemRightDelim
)
346 l
.pos
+= leftTrimLength(l
.input
[l
.pos
:])
352 // lexInsideAction scans the elements inside action delimiters.
353 func lexInsideAction(l
*lexer
) stateFn
{
354 // Either number, quoted string, or identifier.
355 // Spaces separate arguments; runs of spaces turn into itemSpace.
356 // Pipe symbols separate and are emitted.
357 delim
, _
:= l
.atRightDelim()
359 if l
.parenDepth
== 0 {
362 return l
.errorf("unclosed left paren")
364 switch r
:= l
.next(); {
365 case r
== eof ||
isEndOfLine(r
):
366 return l
.errorf("unclosed action")
371 return l
.errorf("expected :=")
373 l
.emit(itemColonEquals
)
385 // special look-ahead for ".field" so we don't break l.backup().
386 if l
.pos
< Pos(len(l
.input
)) {
388 if r
< '0' ||
'9' < r
{
392 fallthrough // '.' can start a number.
393 case r
== '+' || r
== '-' ||
('0' <= r
&& r
<= '9'):
396 case isAlphaNumeric(r
):
400 l
.emit(itemLeftParen
)
403 l
.emit(itemRightParen
)
405 if l
.parenDepth
< 0 {
406 return l
.errorf("unexpected right paren %#U", r
)
408 case r
<= unicode
.MaxASCII
&& unicode
.IsPrint(r
):
410 return lexInsideAction
412 return l
.errorf("unrecognized character in action: %#U", r
)
414 return lexInsideAction
417 // lexSpace scans a run of space characters.
418 // One space has already been seen.
419 func lexSpace(l
*lexer
) stateFn
{
420 for isSpace(l
.peek()) {
424 return lexInsideAction
427 // lexIdentifier scans an alphanumeric.
428 func lexIdentifier(l
*lexer
) stateFn
{
431 switch r
:= l
.next(); {
432 case isAlphaNumeric(r
):
436 word
:= l
.input
[l
.start
:l
.pos
]
437 if !l
.atTerminator() {
438 return l
.errorf("bad character %#U", r
)
441 case key
[word
] > itemKeyword
:
445 case word
== "true", word
== "false":
448 l
.emit(itemIdentifier
)
453 return lexInsideAction
456 // lexField scans a field: .Alphanumeric.
457 // The . has been scanned.
458 func lexField(l
*lexer
) stateFn
{
459 return lexFieldOrVariable(l
, itemField
)
462 // lexVariable scans a Variable: $Alphanumeric.
463 // The $ has been scanned.
464 func lexVariable(l
*lexer
) stateFn
{
465 if l
.atTerminator() { // Nothing interesting follows -> "$".
467 return lexInsideAction
469 return lexFieldOrVariable(l
, itemVariable
)
472 // lexVariable scans a field or variable: [.$]Alphanumeric.
473 // The . or $ has been scanned.
474 func lexFieldOrVariable(l
*lexer
, typ itemType
) stateFn
{
475 if l
.atTerminator() { // Nothing interesting follows -> "." or "$".
476 if typ
== itemVariable
{
481 return lexInsideAction
486 if !isAlphaNumeric(r
) {
491 if !l
.atTerminator() {
492 return l
.errorf("bad character %#U", r
)
495 return lexInsideAction
498 // atTerminator reports whether the input is at valid termination character to
499 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
500 // like "$x+2" not being acceptable without a space, in case we decide one
501 // day to implement arithmetic.
502 func (l
*lexer
) atTerminator() bool {
504 if isSpace(r
) ||
isEndOfLine(r
) {
508 case eof
, '.', ',', '|', ':', ')', '(':
511 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
512 // succeed but should fail) but only in extremely rare cases caused by willfully
513 // bad choice of delimiter.
514 if rd
, _
:= utf8
.DecodeRuneInString(l
.rightDelim
); rd
== r
{
520 // lexChar scans a character constant. The initial quote is already
521 // scanned. Syntax checking is done by the parser.
522 func lexChar(l
*lexer
) stateFn
{
527 if r
:= l
.next(); r
!= eof
&& r
!= '\n' {
532 return l
.errorf("unterminated character constant")
537 l
.emit(itemCharConstant
)
538 return lexInsideAction
541 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
542 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
543 // and "089" - but when it's wrong the input is invalid and the parser (via
544 // strconv) will notice.
545 func lexNumber(l
*lexer
) stateFn
{
547 return l
.errorf("bad number syntax: %q", l
.input
[l
.start
:l
.pos
])
549 if sign
:= l
.peek(); sign
== '+' || sign
== '-' {
550 // Complex: 1+2i. No spaces, must end in 'i'.
551 if !l
.scanNumber() || l
.input
[l
.pos
-1] != 'i' {
552 return l
.errorf("bad number syntax: %q", l
.input
[l
.start
:l
.pos
])
558 return lexInsideAction
561 func (l
*lexer
) scanNumber() bool {
562 // Optional leading sign.
565 digits
:= "0123456789"
566 if l
.accept("0") && l
.accept("xX") {
567 digits
= "0123456789abcdefABCDEF"
575 l
.acceptRun("0123456789")
579 // Next thing mustn't be alphanumeric.
580 if isAlphaNumeric(l
.peek()) {
587 // lexQuote scans a quoted string.
588 func lexQuote(l
*lexer
) stateFn
{
593 if r
:= l
.next(); r
!= eof
&& r
!= '\n' {
598 return l
.errorf("unterminated quoted string")
604 return lexInsideAction
607 // lexRawQuote scans a raw quoted string.
608 func lexRawQuote(l
*lexer
) stateFn
{
614 // Restore line number to location of opening quote.
615 // We will error out so it's ok just to overwrite the field.
617 return l
.errorf("unterminated raw quoted string")
622 l
.emit(itemRawString
)
623 return lexInsideAction
626 // isSpace reports whether r is a space character.
627 func isSpace(r rune
) bool {
628 return r
== ' ' || r
== '\t'
631 // isEndOfLine reports whether r is an end-of-line character.
632 func isEndOfLine(r rune
) bool {
633 return r
== '\r' || r
== '\n'
636 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
637 func isAlphaNumeric(r rune
) bool {
638 return r
== '_' || unicode
.IsLetter(r
) || unicode
.IsDigit(r
)