libgo: update to Go1.10rc2
[official-gcc.git] / libgo / go / text / template / parse / lex.go
blobe112cb7714a422a58fd824e751a8a569deda9e88
1 // Copyright 2011 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package parse
7 import (
8 "fmt"
9 "strings"
10 "unicode"
11 "unicode/utf8"
14 // item represents a token or text string returned from the scanner.
15 type item struct {
16 typ itemType // The type of this item.
17 pos Pos // The starting position, in bytes, of this item in the input string.
18 val string // The value of this item.
19 line int // The line number at the start of this item.
22 func (i item) String() string {
23 switch {
24 case i.typ == itemEOF:
25 return "EOF"
26 case i.typ == itemError:
27 return i.val
28 case i.typ > itemKeyword:
29 return fmt.Sprintf("<%s>", i.val)
30 case len(i.val) > 10:
31 return fmt.Sprintf("%.10q...", i.val)
33 return fmt.Sprintf("%q", i.val)
36 // itemType identifies the type of lex items.
37 type itemType int
39 const (
40 itemError itemType = iota // error occurred; value is text of error
41 itemBool // boolean constant
42 itemChar // printable ASCII character; grab bag for comma etc.
43 itemCharConstant // character constant
44 itemComplex // complex constant (1+2i); imaginary is just a number
45 itemColonEquals // colon-equals (':=') introducing a declaration
46 itemEOF
47 itemField // alphanumeric identifier starting with '.'
48 itemIdentifier // alphanumeric identifier not starting with '.'
49 itemLeftDelim // left action delimiter
50 itemLeftParen // '(' inside action
51 itemNumber // simple number, including imaginary
52 itemPipe // pipe symbol
53 itemRawString // raw quoted string (includes quotes)
54 itemRightDelim // right action delimiter
55 itemRightParen // ')' inside action
56 itemSpace // run of spaces separating arguments
57 itemString // quoted string (includes quotes)
58 itemText // plain text
59 itemVariable // variable starting with '$', such as '$' or '$1' or '$hello'
60 // Keywords appear after all the rest.
61 itemKeyword // used only to delimit the keywords
62 itemBlock // block keyword
63 itemDot // the cursor, spelled '.'
64 itemDefine // define keyword
65 itemElse // else keyword
66 itemEnd // end keyword
67 itemIf // if keyword
68 itemNil // the untyped nil constant, easiest to treat as a keyword
69 itemRange // range keyword
70 itemTemplate // template keyword
71 itemWith // with keyword
74 var key = map[string]itemType{
75 ".": itemDot,
76 "block": itemBlock,
77 "define": itemDefine,
78 "else": itemElse,
79 "end": itemEnd,
80 "if": itemIf,
81 "range": itemRange,
82 "nil": itemNil,
83 "template": itemTemplate,
84 "with": itemWith,
87 const eof = -1
89 // Trimming spaces.
90 // If the action begins "{{- " rather than "{{", then all space/tab/newlines
91 // preceding the action are trimmed; conversely if it ends " -}}" the
92 // leading spaces are trimmed. This is done entirely in the lexer; the
93 // parser never sees it happen. We require an ASCII space to be
94 // present to avoid ambiguity with things like "{{-3}}". It reads
95 // better with the space present anyway. For simplicity, only ASCII
96 // space does the job.
97 const (
98 spaceChars = " \t\r\n" // These are the space characters defined by Go itself.
99 leftTrimMarker = "- " // Attached to left delimiter, trims trailing spaces from preceding text.
100 rightTrimMarker = " -" // Attached to right delimiter, trims leading spaces from following text.
101 trimMarkerLen = Pos(len(leftTrimMarker))
104 // stateFn represents the state of the scanner as a function that returns the next state.
105 type stateFn func(*lexer) stateFn
107 // lexer holds the state of the scanner.
108 type lexer struct {
109 name string // the name of the input; used only for error reports
110 input string // the string being scanned
111 leftDelim string // start of action
112 rightDelim string // end of action
113 pos Pos // current position in the input
114 start Pos // start position of this item
115 width Pos // width of last rune read from input
116 items chan item // channel of scanned items
117 parenDepth int // nesting depth of ( ) exprs
118 line int // 1+number of newlines seen
121 // next returns the next rune in the input.
122 func (l *lexer) next() rune {
123 if int(l.pos) >= len(l.input) {
124 l.width = 0
125 return eof
127 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
128 l.width = Pos(w)
129 l.pos += l.width
130 if r == '\n' {
131 l.line++
133 return r
136 // peek returns but does not consume the next rune in the input.
137 func (l *lexer) peek() rune {
138 r := l.next()
139 l.backup()
140 return r
143 // backup steps back one rune. Can only be called once per call of next.
144 func (l *lexer) backup() {
145 l.pos -= l.width
146 // Correct newline count.
147 if l.width == 1 && l.input[l.pos] == '\n' {
148 l.line--
152 // emit passes an item back to the client.
153 func (l *lexer) emit(t itemType) {
154 l.items <- item{t, l.start, l.input[l.start:l.pos], l.line}
155 // Some items contain text internally. If so, count their newlines.
156 switch t {
157 case itemText, itemRawString, itemLeftDelim, itemRightDelim:
158 l.line += strings.Count(l.input[l.start:l.pos], "\n")
160 l.start = l.pos
163 // ignore skips over the pending input before this point.
164 func (l *lexer) ignore() {
165 l.line += strings.Count(l.input[l.start:l.pos], "\n")
166 l.start = l.pos
169 // accept consumes the next rune if it's from the valid set.
170 func (l *lexer) accept(valid string) bool {
171 if strings.ContainsRune(valid, l.next()) {
172 return true
174 l.backup()
175 return false
178 // acceptRun consumes a run of runes from the valid set.
179 func (l *lexer) acceptRun(valid string) {
180 for strings.ContainsRune(valid, l.next()) {
182 l.backup()
185 // errorf returns an error token and terminates the scan by passing
186 // back a nil pointer that will be the next state, terminating l.nextItem.
187 func (l *lexer) errorf(format string, args ...interface{}) stateFn {
188 l.items <- item{itemError, l.start, fmt.Sprintf(format, args...), l.line}
189 return nil
192 // nextItem returns the next item from the input.
193 // Called by the parser, not in the lexing goroutine.
194 func (l *lexer) nextItem() item {
195 return <-l.items
198 // drain drains the output so the lexing goroutine will exit.
199 // Called by the parser, not in the lexing goroutine.
200 func (l *lexer) drain() {
201 for range l.items {
205 // lex creates a new scanner for the input string.
206 func lex(name, input, left, right string) *lexer {
207 if left == "" {
208 left = leftDelim
210 if right == "" {
211 right = rightDelim
213 l := &lexer{
214 name: name,
215 input: input,
216 leftDelim: left,
217 rightDelim: right,
218 items: make(chan item),
219 line: 1,
221 go l.run()
222 return l
225 // run runs the state machine for the lexer.
226 func (l *lexer) run() {
227 for state := lexText; state != nil; {
228 state = state(l)
230 close(l.items)
233 // state functions
235 const (
236 leftDelim = "{{"
237 rightDelim = "}}"
238 leftComment = "/*"
239 rightComment = "*/"
242 // lexText scans until an opening action delimiter, "{{".
243 func lexText(l *lexer) stateFn {
244 l.width = 0
245 if x := strings.Index(l.input[l.pos:], l.leftDelim); x >= 0 {
246 ldn := Pos(len(l.leftDelim))
247 l.pos += Pos(x)
248 trimLength := Pos(0)
249 if strings.HasPrefix(l.input[l.pos+ldn:], leftTrimMarker) {
250 trimLength = rightTrimLength(l.input[l.start:l.pos])
252 l.pos -= trimLength
253 if l.pos > l.start {
254 l.emit(itemText)
256 l.pos += trimLength
257 l.ignore()
258 return lexLeftDelim
259 } else {
260 l.pos = Pos(len(l.input))
262 // Correctly reached EOF.
263 if l.pos > l.start {
264 l.emit(itemText)
266 l.emit(itemEOF)
267 return nil
270 // rightTrimLength returns the length of the spaces at the end of the string.
271 func rightTrimLength(s string) Pos {
272 return Pos(len(s) - len(strings.TrimRight(s, spaceChars)))
275 // atRightDelim reports whether the lexer is at a right delimiter, possibly preceded by a trim marker.
276 func (l *lexer) atRightDelim() (delim, trimSpaces bool) {
277 if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
278 return true, false
280 // The right delim might have the marker before.
281 if strings.HasPrefix(l.input[l.pos:], rightTrimMarker) &&
282 strings.HasPrefix(l.input[l.pos+trimMarkerLen:], l.rightDelim) {
283 return true, true
285 return false, false
288 // leftTrimLength returns the length of the spaces at the beginning of the string.
289 func leftTrimLength(s string) Pos {
290 return Pos(len(s) - len(strings.TrimLeft(s, spaceChars)))
293 // lexLeftDelim scans the left delimiter, which is known to be present, possibly with a trim marker.
294 func lexLeftDelim(l *lexer) stateFn {
295 l.pos += Pos(len(l.leftDelim))
296 trimSpace := strings.HasPrefix(l.input[l.pos:], leftTrimMarker)
297 afterMarker := Pos(0)
298 if trimSpace {
299 afterMarker = trimMarkerLen
301 if strings.HasPrefix(l.input[l.pos+afterMarker:], leftComment) {
302 l.pos += afterMarker
303 l.ignore()
304 return lexComment
306 l.emit(itemLeftDelim)
307 l.pos += afterMarker
308 l.ignore()
309 l.parenDepth = 0
310 return lexInsideAction
313 // lexComment scans a comment. The left comment marker is known to be present.
314 func lexComment(l *lexer) stateFn {
315 l.pos += Pos(len(leftComment))
316 i := strings.Index(l.input[l.pos:], rightComment)
317 if i < 0 {
318 return l.errorf("unclosed comment")
320 l.pos += Pos(i + len(rightComment))
321 delim, trimSpace := l.atRightDelim()
322 if !delim {
323 return l.errorf("comment ends before closing delimiter")
325 if trimSpace {
326 l.pos += trimMarkerLen
328 l.pos += Pos(len(l.rightDelim))
329 if trimSpace {
330 l.pos += leftTrimLength(l.input[l.pos:])
332 l.ignore()
333 return lexText
336 // lexRightDelim scans the right delimiter, which is known to be present, possibly with a trim marker.
337 func lexRightDelim(l *lexer) stateFn {
338 trimSpace := strings.HasPrefix(l.input[l.pos:], rightTrimMarker)
339 if trimSpace {
340 l.pos += trimMarkerLen
341 l.ignore()
343 l.pos += Pos(len(l.rightDelim))
344 l.emit(itemRightDelim)
345 if trimSpace {
346 l.pos += leftTrimLength(l.input[l.pos:])
347 l.ignore()
349 return lexText
352 // lexInsideAction scans the elements inside action delimiters.
353 func lexInsideAction(l *lexer) stateFn {
354 // Either number, quoted string, or identifier.
355 // Spaces separate arguments; runs of spaces turn into itemSpace.
356 // Pipe symbols separate and are emitted.
357 delim, _ := l.atRightDelim()
358 if delim {
359 if l.parenDepth == 0 {
360 return lexRightDelim
362 return l.errorf("unclosed left paren")
364 switch r := l.next(); {
365 case r == eof || isEndOfLine(r):
366 return l.errorf("unclosed action")
367 case isSpace(r):
368 return lexSpace
369 case r == ':':
370 if l.next() != '=' {
371 return l.errorf("expected :=")
373 l.emit(itemColonEquals)
374 case r == '|':
375 l.emit(itemPipe)
376 case r == '"':
377 return lexQuote
378 case r == '`':
379 return lexRawQuote
380 case r == '$':
381 return lexVariable
382 case r == '\'':
383 return lexChar
384 case r == '.':
385 // special look-ahead for ".field" so we don't break l.backup().
386 if l.pos < Pos(len(l.input)) {
387 r := l.input[l.pos]
388 if r < '0' || '9' < r {
389 return lexField
392 fallthrough // '.' can start a number.
393 case r == '+' || r == '-' || ('0' <= r && r <= '9'):
394 l.backup()
395 return lexNumber
396 case isAlphaNumeric(r):
397 l.backup()
398 return lexIdentifier
399 case r == '(':
400 l.emit(itemLeftParen)
401 l.parenDepth++
402 case r == ')':
403 l.emit(itemRightParen)
404 l.parenDepth--
405 if l.parenDepth < 0 {
406 return l.errorf("unexpected right paren %#U", r)
408 case r <= unicode.MaxASCII && unicode.IsPrint(r):
409 l.emit(itemChar)
410 return lexInsideAction
411 default:
412 return l.errorf("unrecognized character in action: %#U", r)
414 return lexInsideAction
417 // lexSpace scans a run of space characters.
418 // One space has already been seen.
419 func lexSpace(l *lexer) stateFn {
420 for isSpace(l.peek()) {
421 l.next()
423 l.emit(itemSpace)
424 return lexInsideAction
427 // lexIdentifier scans an alphanumeric.
428 func lexIdentifier(l *lexer) stateFn {
429 Loop:
430 for {
431 switch r := l.next(); {
432 case isAlphaNumeric(r):
433 // absorb.
434 default:
435 l.backup()
436 word := l.input[l.start:l.pos]
437 if !l.atTerminator() {
438 return l.errorf("bad character %#U", r)
440 switch {
441 case key[word] > itemKeyword:
442 l.emit(key[word])
443 case word[0] == '.':
444 l.emit(itemField)
445 case word == "true", word == "false":
446 l.emit(itemBool)
447 default:
448 l.emit(itemIdentifier)
450 break Loop
453 return lexInsideAction
456 // lexField scans a field: .Alphanumeric.
457 // The . has been scanned.
458 func lexField(l *lexer) stateFn {
459 return lexFieldOrVariable(l, itemField)
462 // lexVariable scans a Variable: $Alphanumeric.
463 // The $ has been scanned.
464 func lexVariable(l *lexer) stateFn {
465 if l.atTerminator() { // Nothing interesting follows -> "$".
466 l.emit(itemVariable)
467 return lexInsideAction
469 return lexFieldOrVariable(l, itemVariable)
472 // lexVariable scans a field or variable: [.$]Alphanumeric.
473 // The . or $ has been scanned.
474 func lexFieldOrVariable(l *lexer, typ itemType) stateFn {
475 if l.atTerminator() { // Nothing interesting follows -> "." or "$".
476 if typ == itemVariable {
477 l.emit(itemVariable)
478 } else {
479 l.emit(itemDot)
481 return lexInsideAction
483 var r rune
484 for {
485 r = l.next()
486 if !isAlphaNumeric(r) {
487 l.backup()
488 break
491 if !l.atTerminator() {
492 return l.errorf("bad character %#U", r)
494 l.emit(typ)
495 return lexInsideAction
498 // atTerminator reports whether the input is at valid termination character to
499 // appear after an identifier. Breaks .X.Y into two pieces. Also catches cases
500 // like "$x+2" not being acceptable without a space, in case we decide one
501 // day to implement arithmetic.
502 func (l *lexer) atTerminator() bool {
503 r := l.peek()
504 if isSpace(r) || isEndOfLine(r) {
505 return true
507 switch r {
508 case eof, '.', ',', '|', ':', ')', '(':
509 return true
511 // Does r start the delimiter? This can be ambiguous (with delim=="//", $x/2 will
512 // succeed but should fail) but only in extremely rare cases caused by willfully
513 // bad choice of delimiter.
514 if rd, _ := utf8.DecodeRuneInString(l.rightDelim); rd == r {
515 return true
517 return false
520 // lexChar scans a character constant. The initial quote is already
521 // scanned. Syntax checking is done by the parser.
522 func lexChar(l *lexer) stateFn {
523 Loop:
524 for {
525 switch l.next() {
526 case '\\':
527 if r := l.next(); r != eof && r != '\n' {
528 break
530 fallthrough
531 case eof, '\n':
532 return l.errorf("unterminated character constant")
533 case '\'':
534 break Loop
537 l.emit(itemCharConstant)
538 return lexInsideAction
541 // lexNumber scans a number: decimal, octal, hex, float, or imaginary. This
542 // isn't a perfect number scanner - for instance it accepts "." and "0x0.2"
543 // and "089" - but when it's wrong the input is invalid and the parser (via
544 // strconv) will notice.
545 func lexNumber(l *lexer) stateFn {
546 if !l.scanNumber() {
547 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
549 if sign := l.peek(); sign == '+' || sign == '-' {
550 // Complex: 1+2i. No spaces, must end in 'i'.
551 if !l.scanNumber() || l.input[l.pos-1] != 'i' {
552 return l.errorf("bad number syntax: %q", l.input[l.start:l.pos])
554 l.emit(itemComplex)
555 } else {
556 l.emit(itemNumber)
558 return lexInsideAction
561 func (l *lexer) scanNumber() bool {
562 // Optional leading sign.
563 l.accept("+-")
564 // Is it hex?
565 digits := "0123456789"
566 if l.accept("0") && l.accept("xX") {
567 digits = "0123456789abcdefABCDEF"
569 l.acceptRun(digits)
570 if l.accept(".") {
571 l.acceptRun(digits)
573 if l.accept("eE") {
574 l.accept("+-")
575 l.acceptRun("0123456789")
577 // Is it imaginary?
578 l.accept("i")
579 // Next thing mustn't be alphanumeric.
580 if isAlphaNumeric(l.peek()) {
581 l.next()
582 return false
584 return true
587 // lexQuote scans a quoted string.
588 func lexQuote(l *lexer) stateFn {
589 Loop:
590 for {
591 switch l.next() {
592 case '\\':
593 if r := l.next(); r != eof && r != '\n' {
594 break
596 fallthrough
597 case eof, '\n':
598 return l.errorf("unterminated quoted string")
599 case '"':
600 break Loop
603 l.emit(itemString)
604 return lexInsideAction
607 // lexRawQuote scans a raw quoted string.
608 func lexRawQuote(l *lexer) stateFn {
609 startLine := l.line
610 Loop:
611 for {
612 switch l.next() {
613 case eof:
614 // Restore line number to location of opening quote.
615 // We will error out so it's ok just to overwrite the field.
616 l.line = startLine
617 return l.errorf("unterminated raw quoted string")
618 case '`':
619 break Loop
622 l.emit(itemRawString)
623 return lexInsideAction
626 // isSpace reports whether r is a space character.
627 func isSpace(r rune) bool {
628 return r == ' ' || r == '\t'
631 // isEndOfLine reports whether r is an end-of-line character.
632 func isEndOfLine(r rune) bool {
633 return r == '\r' || r == '\n'
636 // isAlphaNumeric reports whether r is an alphabetic, digit, or underscore.
637 func isAlphaNumeric(r rune) bool {
638 return r == '_' || unicode.IsLetter(r) || unicode.IsDigit(r)