libgo/go/encoding/xml/xml.go

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 // Package xml implements a simple XML 1.0 parser that
   6 // understands XML name spaces.
   7 package xml
   8
   9 // References:
  10 //    Annotated XML spec: https://www.xml.com/axml/testaxml.htm
  11 //    XML name spaces: https://www.w3.org/TR/REC-xml-names/
  12
  13 // TODO(rsc):
  14 //      Test error handling.
  15
  16 import (
  17         "bufio"
  18         "bytes"
  19         "errors"
  20         "fmt"
  21         "io"
  22         "strconv"
  23         "strings"
  24         "unicode"
  25         "unicode/utf8"
  26 )
  27
  28 // A SyntaxError represents a syntax error in the XML input stream.
  29 type SyntaxError struct {
  30         Msg  string
  31         Line int
  32 }
  33
  34 func (e *SyntaxError) Error() string {
  35         return "XML syntax error on line " + strconv.Itoa(e.Line) + ": " + e.Msg
  36 }
  37
  38 // A Name represents an XML name (Local) annotated
  39 // with a name space identifier (Space).
  40 // In tokens returned by Decoder.Token, the Space identifier
  41 // is given as a canonical URL, not the short prefix used
  42 // in the document being parsed.
  43 type Name struct {
  44         Space, Local string
  45 }
  46
  47 // An Attr represents an attribute in an XML element (Name=Value).
  48 type Attr struct {
  49         Name  Name
  50         Value string
  51 }
  52
  53 // A Token is an interface holding one of the token types:
  54 // StartElement, EndElement, CharData, Comment, ProcInst, or Directive.
  55 type Token interface{}
  56
  57 // A StartElement represents an XML start element.
  58 type StartElement struct {
  59         Name Name
  60         Attr []Attr
  61 }
  62
  63 // Copy creates a new copy of StartElement.
  64 func (e StartElement) Copy() StartElement {
  65         attrs := make([]Attr, len(e.Attr))
  66         copy(attrs, e.Attr)
  67         e.Attr = attrs
  68         return e
  69 }
  70
  71 // End returns the corresponding XML end element.
  72 func (e StartElement) End() EndElement {
  73         return EndElement{e.Name}
  74 }
  75
  76 // An EndElement represents an XML end element.
  77 type EndElement struct {
  78         Name Name
  79 }
  80
  81 // A CharData represents XML character data (raw text),
  82 // in which XML escape sequences have been replaced by
  83 // the characters they represent.
  84 type CharData []byte
  85
  86 func makeCopy(b []byte) []byte {
  87         b1 := make([]byte, len(b))
  88         copy(b1, b)
  89         return b1
  90 }
  91
  92 // Copy creates a new copy of CharData.
  93 func (c CharData) Copy() CharData { return CharData(makeCopy(c)) }
  94
  95 // A Comment represents an XML comment of the form <!--comment-->.
  96 // The bytes do not include the <!-- and --> comment markers.
  97 type Comment []byte
  98
  99 // Copy creates a new copy of Comment.
 100 func (c Comment) Copy() Comment { return Comment(makeCopy(c)) }
 101
 102 // A ProcInst represents an XML processing instruction of the form <?target inst?>
 103 type ProcInst struct {
 104         Target string
 105         Inst   []byte
 106 }
 107
 108 // Copy creates a new copy of ProcInst.
 109 func (p ProcInst) Copy() ProcInst {
 110         p.Inst = makeCopy(p.Inst)
 111         return p
 112 }
 113
 114 // A Directive represents an XML directive of the form <!text>.
 115 // The bytes do not include the <! and > markers.
 116 type Directive []byte
 117
 118 // Copy creates a new copy of Directive.
 119 func (d Directive) Copy() Directive { return Directive(makeCopy(d)) }
 120
 121 // CopyToken returns a copy of a Token.
 122 func CopyToken(t Token) Token {
 123         switch v := t.(type) {
 124         case CharData:
 125                 return v.Copy()
 126         case Comment:
 127                 return v.Copy()
 128         case Directive:
 129                 return v.Copy()
 130         case ProcInst:
 131                 return v.Copy()
 132         case StartElement:
 133                 return v.Copy()
 134         }
 135         return t
 136 }
 137
 138 // A TokenReader is anything that can decode a stream of XML tokens, including a
 139 // Decoder.
 140 //
 141 // When Token encounters an error or end-of-file condition after successfully
 142 // reading a token, it returns the token. It may return the (non-nil) error from
 143 // the same call or return the error (and a nil token) from a subsequent call.
 144 // An instance of this general case is that a TokenReader returning a non-nil
 145 // token at the end of the token stream may return either io.EOF or a nil error.
 146 // The next Read should return nil, io.EOF.
 147 //
 148 // Implementations of Token are discouraged from returning a nil token with a
 149 // nil error. Callers should treat a return of nil, nil as indicating that
 150 // nothing happened; in particular it does not indicate EOF.
 151 type TokenReader interface {
 152         Token() (Token, error)
 153 }
 154
 155 // A Decoder represents an XML parser reading a particular input stream.
 156 // The parser assumes that its input is encoded in UTF-8.
 157 type Decoder struct {
 158         // Strict defaults to true, enforcing the requirements
 159         // of the XML specification.
 160         // If set to false, the parser allows input containing common
 161         // mistakes:
 162         //      * If an element is missing an end tag, the parser invents
 163         //        end tags as necessary to keep the return values from Token
 164         //        properly balanced.
 165         //      * In attribute values and character data, unknown or malformed
 166         //        character entities (sequences beginning with &) are left alone.
 167         //
 168         // Setting:
 169         //
 170         //      d.Strict = false
 171         //      d.AutoClose = xml.HTMLAutoClose
 172         //      d.Entity = xml.HTMLEntity
 173         //
 174         // creates a parser that can handle typical HTML.
 175         //
 176         // Strict mode does not enforce the requirements of the XML name spaces TR.
 177         // In particular it does not reject name space tags using undefined prefixes.
 178         // Such tags are recorded with the unknown prefix as the name space URL.
 179         Strict bool
 180
 181         // When Strict == false, AutoClose indicates a set of elements to
 182         // consider closed immediately after they are opened, regardless
 183         // of whether an end element is present.
 184         AutoClose []string
 185
 186         // Entity can be used to map non-standard entity names to string replacements.
 187         // The parser behaves as if these standard mappings are present in the map,
 188         // regardless of the actual map content:
 189         //
 190         //      "lt": "<",
 191         //      "gt": ">",
 192         //      "amp": "&",
 193         //      "apos": "'",
 194         //      "quot": `"`,
 195         Entity map[string]string
 196
 197         // CharsetReader, if non-nil, defines a function to generate
 198         // charset-conversion readers, converting from the provided
 199         // non-UTF-8 charset into UTF-8. If CharsetReader is nil or
 200         // returns an error, parsing stops with an error. One of the
 201         // CharsetReader's result values must be non-nil.
 202         CharsetReader func(charset string, input io.Reader) (io.Reader, error)
 203
 204         // DefaultSpace sets the default name space used for unadorned tags,
 205         // as if the entire XML stream were wrapped in an element containing
 206         // the attribute xmlns="DefaultSpace".
 207         DefaultSpace string
 208
 209         r              io.ByteReader
 210         t              TokenReader
 211         buf            bytes.Buffer
 212         saved          *bytes.Buffer
 213         stk            *stack
 214         free           *stack
 215         needClose      bool
 216         toClose        Name
 217         nextToken      Token
 218         nextByte       int
 219         ns             map[string]string
 220         err            error
 221         line           int
 222         offset         int64
 223         unmarshalDepth int
 224 }
 225
 226 // NewDecoder creates a new XML parser reading from r.
 227 // If r does not implement io.ByteReader, NewDecoder will
 228 // do its own buffering.
 229 func NewDecoder(r io.Reader) *Decoder {
 230         d := &Decoder{
 231                 ns:       make(map[string]string),
 232                 nextByte: -1,
 233                 line:     1,
 234                 Strict:   true,
 235         }
 236         d.switchToReader(r)
 237         return d
 238 }
 239
 240 // NewTokenDecoder creates a new XML parser using an underlying token stream.
 241 func NewTokenDecoder(t TokenReader) *Decoder {
 242         // Is it already a Decoder?
 243         if d, ok := t.(*Decoder); ok {
 244                 return d
 245         }
 246         d := &Decoder{
 247                 ns:       make(map[string]string),
 248                 t:        t,
 249                 nextByte: -1,
 250                 line:     1,
 251                 Strict:   true,
 252         }
 253         return d
 254 }
 255
 256 // Token returns the next XML token in the input stream.
 257 // At the end of the input stream, Token returns nil, io.EOF.
 258 //
 259 // Slices of bytes in the returned token data refer to the
 260 // parser's internal buffer and remain valid only until the next
 261 // call to Token. To acquire a copy of the bytes, call CopyToken
 262 // or the token's Copy method.
 263 //
 264 // Token expands self-closing elements such as <br/>
 265 // into separate start and end elements returned by successive calls.
 266 //
 267 // Token guarantees that the StartElement and EndElement
 268 // tokens it returns are properly nested and matched:
 269 // if Token encounters an unexpected end element
 270 // or EOF before all expected end elements,
 271 // it will return an error.
 272 //
 273 // Token implements XML name spaces as described by
 274 // https://www.w3.org/TR/REC-xml-names/.  Each of the
 275 // Name structures contained in the Token has the Space
 276 // set to the URL identifying its name space when known.
 277 // If Token encounters an unrecognized name space prefix,
 278 // it uses the prefix as the Space rather than report an error.
 279 func (d *Decoder) Token() (Token, error) {
 280         var t Token
 281         var err error
 282         if d.stk != nil && d.stk.kind == stkEOF {
 283                 return nil, io.EOF
 284         }
 285         if d.nextToken != nil {
 286                 t = d.nextToken
 287                 d.nextToken = nil
 288         } else if t, err = d.rawToken(); err != nil {
 289                 if err == io.EOF && d.stk != nil && d.stk.kind != stkEOF {
 290                         err = d.syntaxError("unexpected EOF")
 291                 }
 292                 return t, err
 293         }
 294
 295         if !d.Strict {
 296                 if t1, ok := d.autoClose(t); ok {
 297                         d.nextToken = t
 298                         t = t1
 299                 }
 300         }
 301         switch t1 := t.(type) {
 302         case StartElement:
 303                 // In XML name spaces, the translations listed in the
 304                 // attributes apply to the element name and
 305                 // to the other attribute names, so process
 306                 // the translations first.
 307                 for _, a := range t1.Attr {
 308                         if a.Name.Space == xmlnsPrefix {
 309                                 v, ok := d.ns[a.Name.Local]
 310                                 d.pushNs(a.Name.Local, v, ok)
 311                                 d.ns[a.Name.Local] = a.Value
 312                         }
 313                         if a.Name.Space == "" && a.Name.Local == xmlnsPrefix {
 314                                 // Default space for untagged names
 315                                 v, ok := d.ns[""]
 316                                 d.pushNs("", v, ok)
 317                                 d.ns[""] = a.Value
 318                         }
 319                 }
 320
 321                 d.translate(&t1.Name, true)
 322                 for i := range t1.Attr {
 323                         d.translate(&t1.Attr[i].Name, false)
 324                 }
 325                 d.pushElement(t1.Name)
 326                 t = t1
 327
 328         case EndElement:
 329                 d.translate(&t1.Name, true)
 330                 if !d.popElement(&t1) {
 331                         return nil, d.err
 332                 }
 333                 t = t1
 334         }
 335         return t, err
 336 }
 337
 338 const (
 339         xmlURL      = "http://www.w3.org/XML/1998/namespace"
 340         xmlnsPrefix = "xmlns"
 341         xmlPrefix   = "xml"
 342 )
 343
 344 // Apply name space translation to name n.
 345 // The default name space (for Space=="")
 346 // applies only to element names, not to attribute names.
 347 func (d *Decoder) translate(n *Name, isElementName bool) {
 348         switch {
 349         case n.Space == xmlnsPrefix:
 350                 return
 351         case n.Space == "" && !isElementName:
 352                 return
 353         case n.Space == xmlPrefix:
 354                 n.Space = xmlURL
 355         case n.Space == "" && n.Local == xmlnsPrefix:
 356                 return
 357         }
 358         if v, ok := d.ns[n.Space]; ok {
 359                 n.Space = v
 360         } else if n.Space == "" {
 361                 n.Space = d.DefaultSpace
 362         }
 363 }
 364
 365 func (d *Decoder) switchToReader(r io.Reader) {
 366         // Get efficient byte at a time reader.
 367         // Assume that if reader has its own
 368         // ReadByte, it's efficient enough.
 369         // Otherwise, use bufio.
 370         if rb, ok := r.(io.ByteReader); ok {
 371                 d.r = rb
 372         } else {
 373                 d.r = bufio.NewReader(r)
 374         }
 375 }
 376
 377 // Parsing state - stack holds old name space translations
 378 // and the current set of open elements. The translations to pop when
 379 // ending a given tag are *below* it on the stack, which is
 380 // more work but forced on us by XML.
 381 type stack struct {
 382         next *stack
 383         kind int
 384         name Name
 385         ok   bool
 386 }
 387
 388 const (
 389         stkStart = iota
 390         stkNs
 391         stkEOF
 392 )
 393
 394 func (d *Decoder) push(kind int) *stack {
 395         s := d.free
 396         if s != nil {
 397                 d.free = s.next
 398         } else {
 399                 s = new(stack)
 400         }
 401         s.next = d.stk
 402         s.kind = kind
 403         d.stk = s
 404         return s
 405 }
 406
 407 func (d *Decoder) pop() *stack {
 408         s := d.stk
 409         if s != nil {
 410                 d.stk = s.next
 411                 s.next = d.free
 412                 d.free = s
 413         }
 414         return s
 415 }
 416
 417 // Record that after the current element is finished
 418 // (that element is already pushed on the stack)
 419 // Token should return EOF until popEOF is called.
 420 func (d *Decoder) pushEOF() {
 421         // Walk down stack to find Start.
 422         // It might not be the top, because there might be stkNs
 423         // entries above it.
 424         start := d.stk
 425         for start.kind != stkStart {
 426                 start = start.next
 427         }
 428         // The stkNs entries below a start are associated with that
 429         // element too; skip over them.
 430         for start.next != nil && start.next.kind == stkNs {
 431                 start = start.next
 432         }
 433         s := d.free
 434         if s != nil {
 435                 d.free = s.next
 436         } else {
 437                 s = new(stack)
 438         }
 439         s.kind = stkEOF
 440         s.next = start.next
 441         start.next = s
 442 }
 443
 444 // Undo a pushEOF.
 445 // The element must have been finished, so the EOF should be at the top of the stack.
 446 func (d *Decoder) popEOF() bool {
 447         if d.stk == nil || d.stk.kind != stkEOF {
 448                 return false
 449         }
 450         d.pop()
 451         return true
 452 }
 453
 454 // Record that we are starting an element with the given name.
 455 func (d *Decoder) pushElement(name Name) {
 456         s := d.push(stkStart)
 457         s.name = name
 458 }
 459
 460 // Record that we are changing the value of ns[local].
 461 // The old value is url, ok.
 462 func (d *Decoder) pushNs(local string, url string, ok bool) {
 463         s := d.push(stkNs)
 464         s.name.Local = local
 465         s.name.Space = url
 466         s.ok = ok
 467 }
 468
 469 // Creates a SyntaxError with the current line number.
 470 func (d *Decoder) syntaxError(msg string) error {
 471         return &SyntaxError{Msg: msg, Line: d.line}
 472 }
 473
 474 // Record that we are ending an element with the given name.
 475 // The name must match the record at the top of the stack,
 476 // which must be a pushElement record.
 477 // After popping the element, apply any undo records from
 478 // the stack to restore the name translations that existed
 479 // before we saw this element.
 480 func (d *Decoder) popElement(t *EndElement) bool {
 481         s := d.pop()
 482         name := t.Name
 483         switch {
 484         case s == nil || s.kind != stkStart:
 485                 d.err = d.syntaxError("unexpected end element </" + name.Local + ">")
 486                 return false
 487         case s.name.Local != name.Local:
 488                 if !d.Strict {
 489                         d.needClose = true
 490                         d.toClose = t.Name
 491                         t.Name = s.name
 492                         return true
 493                 }
 494                 d.err = d.syntaxError("element <" + s.name.Local + "> closed by </" + name.Local + ">")
 495                 return false
 496         case s.name.Space != name.Space:
 497                 d.err = d.syntaxError("element <" + s.name.Local + "> in space " + s.name.Space +
 498                         "closed by </" + name.Local + "> in space " + name.Space)
 499                 return false
 500         }
 501
 502         // Pop stack until a Start or EOF is on the top, undoing the
 503         // translations that were associated with the element we just closed.
 504         for d.stk != nil && d.stk.kind != stkStart && d.stk.kind != stkEOF {
 505                 s := d.pop()
 506                 if s.ok {
 507                         d.ns[s.name.Local] = s.name.Space
 508                 } else {
 509                         delete(d.ns, s.name.Local)
 510                 }
 511         }
 512
 513         return true
 514 }
 515
 516 // If the top element on the stack is autoclosing and
 517 // t is not the end tag, invent the end tag.
 518 func (d *Decoder) autoClose(t Token) (Token, bool) {
 519         if d.stk == nil || d.stk.kind != stkStart {
 520                 return nil, false
 521         }
 522         name := strings.ToLower(d.stk.name.Local)
 523         for _, s := range d.AutoClose {
 524                 if strings.ToLower(s) == name {
 525                         // This one should be auto closed if t doesn't close it.
 526                         et, ok := t.(EndElement)
 527                         if !ok || et.Name.Local != name {
 528                                 return EndElement{d.stk.name}, true
 529                         }
 530                         break
 531                 }
 532         }
 533         return nil, false
 534 }
 535
 536 var errRawToken = errors.New("xml: cannot use RawToken from UnmarshalXML method")
 537
 538 // RawToken is like Token but does not verify that
 539 // start and end elements match and does not translate
 540 // name space prefixes to their corresponding URLs.
 541 func (d *Decoder) RawToken() (Token, error) {
 542         if d.unmarshalDepth > 0 {
 543                 return nil, errRawToken
 544         }
 545         return d.rawToken()
 546 }
 547
 548 func (d *Decoder) rawToken() (Token, error) {
 549         if d.t != nil {
 550                 return d.t.Token()
 551         }
 552         if d.err != nil {
 553                 return nil, d.err
 554         }
 555         if d.needClose {
 556                 // The last element we read was self-closing and
 557                 // we returned just the StartElement half.
 558                 // Return the EndElement half now.
 559                 d.needClose = false
 560                 return EndElement{d.toClose}, nil
 561         }
 562
 563         b, ok := d.getc()
 564         if !ok {
 565                 return nil, d.err
 566         }
 567
 568         if b != '<' {
 569                 // Text section.
 570                 d.ungetc(b)
 571                 data := d.text(-1, false)
 572                 if data == nil {
 573                         return nil, d.err
 574                 }
 575                 return CharData(data), nil
 576         }
 577
 578         if b, ok = d.mustgetc(); !ok {
 579                 return nil, d.err
 580         }
 581         switch b {
 582         case '/':
 583                 // </: End element
 584                 var name Name
 585                 if name, ok = d.nsname(); !ok {
 586                         if d.err == nil {
 587                                 d.err = d.syntaxError("expected element name after </")
 588                         }
 589                         return nil, d.err
 590                 }
 591                 d.space()
 592                 if b, ok = d.mustgetc(); !ok {
 593                         return nil, d.err
 594                 }
 595                 if b != '>' {
 596                         d.err = d.syntaxError("invalid characters between </" + name.Local + " and >")
 597                         return nil, d.err
 598                 }
 599                 return EndElement{name}, nil
 600
 601         case '?':
 602                 // <?: Processing instruction.
 603                 var target string
 604                 if target, ok = d.name(); !ok {
 605                         if d.err == nil {
 606                                 d.err = d.syntaxError("expected target name after <?")
 607                         }
 608                         return nil, d.err
 609                 }
 610                 d.space()
 611                 d.buf.Reset()
 612                 var b0 byte
 613                 for {
 614                         if b, ok = d.mustgetc(); !ok {
 615                                 return nil, d.err
 616                         }
 617                         d.buf.WriteByte(b)
 618                         if b0 == '?' && b == '>' {
 619                                 break
 620                         }
 621                         b0 = b
 622                 }
 623                 data := d.buf.Bytes()
 624                 data = data[0 : len(data)-2] // chop ?>
 625
 626                 if target == "xml" {
 627                         content := string(data)
 628                         ver := procInst("version", content)
 629                         if ver != "" && ver != "1.0" {
 630                                 d.err = fmt.Errorf("xml: unsupported version %q; only version 1.0 is supported", ver)
 631                                 return nil, d.err
 632                         }
 633                         enc := procInst("encoding", content)
 634                         if enc != "" && enc != "utf-8" && enc != "UTF-8" && !strings.EqualFold(enc, "utf-8") {
 635                                 if d.CharsetReader == nil {
 636                                         d.err = fmt.Errorf("xml: encoding %q declared but Decoder.CharsetReader is nil", enc)
 637                                         return nil, d.err
 638                                 }
 639                                 newr, err := d.CharsetReader(enc, d.r.(io.Reader))
 640                                 if err != nil {
 641                                         d.err = fmt.Errorf("xml: opening charset %q: %v", enc, err)
 642                                         return nil, d.err
 643                                 }
 644                                 if newr == nil {
 645                                         panic("CharsetReader returned a nil Reader for charset " + enc)
 646                                 }
 647                                 d.switchToReader(newr)
 648                         }
 649                 }
 650                 return ProcInst{target, data}, nil
 651
 652         case '!':
 653                 // <!: Maybe comment, maybe CDATA.
 654                 if b, ok = d.mustgetc(); !ok {
 655                         return nil, d.err
 656                 }
 657                 switch b {
 658                 case '-': // <!-
 659                         // Probably <!-- for a comment.
 660                         if b, ok = d.mustgetc(); !ok {
 661                                 return nil, d.err
 662                         }
 663                         if b != '-' {
 664                                 d.err = d.syntaxError("invalid sequence <!- not part of <!--")
 665                                 return nil, d.err
 666                         }
 667                         // Look for terminator.
 668                         d.buf.Reset()
 669                         var b0, b1 byte
 670                         for {
 671                                 if b, ok = d.mustgetc(); !ok {
 672                                         return nil, d.err
 673                                 }
 674                                 d.buf.WriteByte(b)
 675                                 if b0 == '-' && b1 == '-' {
 676                                         if b != '>' {
 677                                                 d.err = d.syntaxError(
 678                                                         `invalid sequence "--" not allowed in comments`)
 679                                                 return nil, d.err
 680                                         }
 681                                         break
 682                                 }
 683                                 b0, b1 = b1, b
 684                         }
 685                         data := d.buf.Bytes()
 686                         data = data[0 : len(data)-3] // chop -->
 687                         return Comment(data), nil
 688
 689                 case '[': // <![
 690                         // Probably <![CDATA[.
 691                         for i := 0; i < 6; i++ {
 692                                 if b, ok = d.mustgetc(); !ok {
 693                                         return nil, d.err
 694                                 }
 695                                 if b != "CDATA["[i] {
 696                                         d.err = d.syntaxError("invalid <![ sequence")
 697                                         return nil, d.err
 698                                 }
 699                         }
 700                         // Have <![CDATA[.  Read text until ]]>.
 701                         data := d.text(-1, true)
 702                         if data == nil {
 703                                 return nil, d.err
 704                         }
 705                         return CharData(data), nil
 706                 }
 707
 708                 // Probably a directive: <!DOCTYPE ...>, <!ENTITY ...>, etc.
 709                 // We don't care, but accumulate for caller. Quoted angle
 710                 // brackets do not count for nesting.
 711                 d.buf.Reset()
 712                 d.buf.WriteByte(b)
 713                 inquote := uint8(0)
 714                 depth := 0
 715                 for {
 716                         if b, ok = d.mustgetc(); !ok {
 717                                 return nil, d.err
 718                         }
 719                         if inquote == 0 && b == '>' && depth == 0 {
 720                                 break
 721                         }
 722                 HandleB:
 723                         d.buf.WriteByte(b)
 724                         switch {
 725                         case b == inquote:
 726                                 inquote = 0
 727
 728                         case inquote != 0:
 729                                 // in quotes, no special action
 730
 731                         case b == '\'' || b == '"':
 732                                 inquote = b
 733
 734                         case b == '>' && inquote == 0:
 735                                 depth--
 736
 737                         case b == '<' && inquote == 0:
 738                                 // Look for <!-- to begin comment.
 739                                 s := "!--"
 740                                 for i := 0; i < len(s); i++ {
 741                                         if b, ok = d.mustgetc(); !ok {
 742                                                 return nil, d.err
 743                                         }
 744                                         if b != s[i] {
 745                                                 for j := 0; j < i; j++ {
 746                                                         d.buf.WriteByte(s[j])
 747                                                 }
 748                                                 depth++
 749                                                 goto HandleB
 750                                         }
 751                                 }
 752
 753                                 // Remove < that was written above.
 754                                 d.buf.Truncate(d.buf.Len() - 1)
 755
 756                                 // Look for terminator.
 757                                 var b0, b1 byte
 758                                 for {
 759                                         if b, ok = d.mustgetc(); !ok {
 760                                                 return nil, d.err
 761                                         }
 762                                         if b0 == '-' && b1 == '-' && b == '>' {
 763                                                 break
 764                                         }
 765                                         b0, b1 = b1, b
 766                                 }
 767                         }
 768                 }
 769                 return Directive(d.buf.Bytes()), nil
 770         }
 771
 772         // Must be an open element like <a href="foo">
 773         d.ungetc(b)
 774
 775         var (
 776                 name  Name
 777                 empty bool
 778                 attr  []Attr
 779         )
 780         if name, ok = d.nsname(); !ok {
 781                 if d.err == nil {
 782                         d.err = d.syntaxError("expected element name after <")
 783                 }
 784                 return nil, d.err
 785         }
 786
 787         attr = []Attr{}
 788         for {
 789                 d.space()
 790                 if b, ok = d.mustgetc(); !ok {
 791                         return nil, d.err
 792                 }
 793                 if b == '/' {
 794                         empty = true
 795                         if b, ok = d.mustgetc(); !ok {
 796                                 return nil, d.err
 797                         }
 798                         if b != '>' {
 799                                 d.err = d.syntaxError("expected /> in element")
 800                                 return nil, d.err
 801                         }
 802                         break
 803                 }
 804                 if b == '>' {
 805                         break
 806                 }
 807                 d.ungetc(b)
 808
 809                 a := Attr{}
 810                 if a.Name, ok = d.nsname(); !ok {
 811                         if d.err == nil {
 812                                 d.err = d.syntaxError("expected attribute name in element")
 813                         }
 814                         return nil, d.err
 815                 }
 816                 d.space()
 817                 if b, ok = d.mustgetc(); !ok {
 818                         return nil, d.err
 819                 }
 820                 if b != '=' {
 821                         if d.Strict {
 822                                 d.err = d.syntaxError("attribute name without = in element")
 823                                 return nil, d.err
 824                         }
 825                         d.ungetc(b)
 826                         a.Value = a.Name.Local
 827                 } else {
 828                         d.space()
 829                         data := d.attrval()
 830                         if data == nil {
 831                                 return nil, d.err
 832                         }
 833                         a.Value = string(data)
 834                 }
 835                 attr = append(attr, a)
 836         }
 837         if empty {
 838                 d.needClose = true
 839                 d.toClose = name
 840         }
 841         return StartElement{name, attr}, nil
 842 }
 843
 844 func (d *Decoder) attrval() []byte {
 845         b, ok := d.mustgetc()
 846         if !ok {
 847                 return nil
 848         }
 849         // Handle quoted attribute values
 850         if b == '"' || b == '\'' {
 851                 return d.text(int(b), false)
 852         }
 853         // Handle unquoted attribute values for strict parsers
 854         if d.Strict {
 855                 d.err = d.syntaxError("unquoted or missing attribute value in element")
 856                 return nil
 857         }
 858         // Handle unquoted attribute values for unstrict parsers
 859         d.ungetc(b)
 860         d.buf.Reset()
 861         for {
 862                 b, ok = d.mustgetc()
 863                 if !ok {
 864                         return nil
 865                 }
 866                 // https://www.w3.org/TR/REC-html40/intro/sgmltut.html#h-3.2.2
 867                 if 'a' <= b && b <= 'z' || 'A' <= b && b <= 'Z' ||
 868                         '0' <= b && b <= '9' || b == '_' || b == ':' || b == '-' {
 869                         d.buf.WriteByte(b)
 870                 } else {
 871                         d.ungetc(b)
 872                         break
 873                 }
 874         }
 875         return d.buf.Bytes()
 876 }
 877
 878 // Skip spaces if any
 879 func (d *Decoder) space() {
 880         for {
 881                 b, ok := d.getc()
 882                 if !ok {
 883                         return
 884                 }
 885                 switch b {
 886                 case ' ', '\r', '\n', '\t':
 887                 default:
 888                         d.ungetc(b)
 889                         return
 890                 }
 891         }
 892 }
 893
 894 // Read a single byte.
 895 // If there is no byte to read, return ok==false
 896 // and leave the error in d.err.
 897 // Maintain line number.
 898 func (d *Decoder) getc() (b byte, ok bool) {
 899         if d.err != nil {
 900                 return 0, false
 901         }
 902         if d.nextByte >= 0 {
 903                 b = byte(d.nextByte)
 904                 d.nextByte = -1
 905         } else {
 906                 b, d.err = d.r.ReadByte()
 907                 if d.err != nil {
 908                         return 0, false
 909                 }
 910                 if d.saved != nil {
 911                         d.saved.WriteByte(b)
 912                 }
 913         }
 914         if b == '\n' {
 915                 d.line++
 916         }
 917         d.offset++
 918         return b, true
 919 }
 920
 921 // InputOffset returns the input stream byte offset of the current decoder position.
 922 // The offset gives the location of the end of the most recently returned token
 923 // and the beginning of the next token.
 924 func (d *Decoder) InputOffset() int64 {
 925         return d.offset
 926 }
 927
 928 // Return saved offset.
 929 // If we did ungetc (nextByte >= 0), have to back up one.
 930 func (d *Decoder) savedOffset() int {
 931         n := d.saved.Len()
 932         if d.nextByte >= 0 {
 933                 n--
 934         }
 935         return n
 936 }
 937
 938 // Must read a single byte.
 939 // If there is no byte to read,
 940 // set d.err to SyntaxError("unexpected EOF")
 941 // and return ok==false
 942 func (d *Decoder) mustgetc() (b byte, ok bool) {
 943         if b, ok = d.getc(); !ok {
 944                 if d.err == io.EOF {
 945                         d.err = d.syntaxError("unexpected EOF")
 946                 }
 947         }
 948         return
 949 }
 950
 951 // Unread a single byte.
 952 func (d *Decoder) ungetc(b byte) {
 953         if b == '\n' {
 954                 d.line--
 955         }
 956         d.nextByte = int(b)
 957         d.offset--
 958 }
 959
 960 var entity = map[string]int{
 961         "lt":   '<',
 962         "gt":   '>',
 963         "amp":  '&',
 964         "apos": '\'',
 965         "quot": '"',
 966 }
 967
 968 // Read plain text section (XML calls it character data).
 969 // If quote >= 0, we are in a quoted string and need to find the matching quote.
 970 // If cdata == true, we are in a <![CDATA[ section and need to find ]]>.
 971 // On failure return nil and leave the error in d.err.
 972 func (d *Decoder) text(quote int, cdata bool) []byte {
 973         var b0, b1 byte
 974         var trunc int
 975         d.buf.Reset()
 976 Input:
 977         for {
 978                 b, ok := d.getc()
 979                 if !ok {
 980                         if cdata {
 981                                 if d.err == io.EOF {
 982                                         d.err = d.syntaxError("unexpected EOF in CDATA section")
 983                                 }
 984                                 return nil
 985                         }
 986                         break Input
 987                 }
 988
 989                 // <![CDATA[ section ends with ]]>.
 990                 // It is an error for ]]> to appear in ordinary text.
 991                 if b0 == ']' && b1 == ']' && b == '>' {
 992                         if cdata {
 993                                 trunc = 2
 994                                 break Input
 995                         }
 996                         d.err = d.syntaxError("unescaped ]]> not in CDATA section")
 997                         return nil
 998                 }
 999
1000                 // Stop reading text if we see a <.
1001                 if b == '<' && !cdata {
1002                         if quote >= 0 {
1003                                 d.err = d.syntaxError("unescaped < inside quoted string")
1004                                 return nil
1005                         }
1006                         d.ungetc('<')
1007                         break Input
1008                 }
1009                 if quote >= 0 && b == byte(quote) {
1010                         break Input
1011                 }
1012                 if b == '&' && !cdata {
1013                         // Read escaped character expression up to semicolon.
1014                         // XML in all its glory allows a document to define and use
1015                         // its own character names with <!ENTITY ...> directives.
1016                         // Parsers are required to recognize lt, gt, amp, apos, and quot
1017                         // even if they have not been declared.
1018                         before := d.buf.Len()
1019                         d.buf.WriteByte('&')
1020                         var ok bool
1021                         var text string
1022                         var haveText bool
1023                         if b, ok = d.mustgetc(); !ok {
1024                                 return nil
1025                         }
1026                         if b == '#' {
1027                                 d.buf.WriteByte(b)
1028                                 if b, ok = d.mustgetc(); !ok {
1029                                         return nil
1030                                 }
1031                                 base := 10
1032                                 if b == 'x' {
1033                                         base = 16
1034                                         d.buf.WriteByte(b)
1035                                         if b, ok = d.mustgetc(); !ok {
1036                                                 return nil
1037                                         }
1038                                 }
1039                                 start := d.buf.Len()
1040                                 for '0' <= b && b <= '9' ||
1041                                         base == 16 && 'a' <= b && b <= 'f' ||
1042                                         base == 16 && 'A' <= b && b <= 'F' {
1043                                         d.buf.WriteByte(b)
1044                                         if b, ok = d.mustgetc(); !ok {
1045                                                 return nil
1046                                         }
1047                                 }
1048                                 if b != ';' {
1049                                         d.ungetc(b)
1050                                 } else {
1051                                         s := string(d.buf.Bytes()[start:])
1052                                         d.buf.WriteByte(';')
1053                                         n, err := strconv.ParseUint(s, base, 64)
1054                                         if err == nil && n <= unicode.MaxRune {
1055                                                 text = string(n)
1056                                                 haveText = true
1057                                         }
1058                                 }
1059                         } else {
1060                                 d.ungetc(b)
1061                                 if !d.readName() {
1062                                         if d.err != nil {
1063                                                 return nil
1064                                         }
1065                                 }
1066                                 if b, ok = d.mustgetc(); !ok {
1067                                         return nil
1068                                 }
1069                                 if b != ';' {
1070                                         d.ungetc(b)
1071                                 } else {
1072                                         name := d.buf.Bytes()[before+1:]
1073                                         d.buf.WriteByte(';')
1074                                         if isName(name) {
1075                                                 s := string(name)
1076                                                 if r, ok := entity[s]; ok {
1077                                                         text = string(r)
1078                                                         haveText = true
1079                                                 } else if d.Entity != nil {
1080                                                         text, haveText = d.Entity[s]
1081                                                 }
1082                                         }
1083                                 }
1084                         }
1085
1086                         if haveText {
1087                                 d.buf.Truncate(before)
1088                                 d.buf.Write([]byte(text))
1089                                 b0, b1 = 0, 0
1090                                 continue Input
1091                         }
1092                         if !d.Strict {
1093                                 b0, b1 = 0, 0
1094                                 continue Input
1095                         }
1096                         ent := string(d.buf.Bytes()[before:])
1097                         if ent[len(ent)-1] != ';' {
1098                                 ent += " (no semicolon)"
1099                         }
1100                         d.err = d.syntaxError("invalid character entity " + ent)
1101                         return nil
1102                 }
1103
1104                 // We must rewrite unescaped \r and \r\n into \n.
1105                 if b == '\r' {
1106                         d.buf.WriteByte('\n')
1107                 } else if b1 == '\r' && b == '\n' {
1108                         // Skip \r\n--we already wrote \n.
1109                 } else {
1110                         d.buf.WriteByte(b)
1111                 }
1112
1113                 b0, b1 = b1, b
1114         }
1115         data := d.buf.Bytes()
1116         data = data[0 : len(data)-trunc]
1117
1118         // Inspect each rune for being a disallowed character.
1119         buf := data
1120         for len(buf) > 0 {
1121                 r, size := utf8.DecodeRune(buf)
1122                 if r == utf8.RuneError && size == 1 {
1123                         d.err = d.syntaxError("invalid UTF-8")
1124                         return nil
1125                 }
1126                 buf = buf[size:]
1127                 if !isInCharacterRange(r) {
1128                         d.err = d.syntaxError(fmt.Sprintf("illegal character code %U", r))
1129                         return nil
1130                 }
1131         }
1132
1133         return data
1134 }
1135
1136 // Decide whether the given rune is in the XML Character Range, per
1137 // the Char production of https://www.xml.com/axml/testaxml.htm,
1138 // Section 2.2 Characters.
1139 func isInCharacterRange(r rune) (inrange bool) {
1140         return r == 0x09 ||
1141                 r == 0x0A ||
1142                 r == 0x0D ||
1143                 r >= 0x20 && r <= 0xD7FF ||
1144                 r >= 0xE000 && r <= 0xFFFD ||
1145                 r >= 0x10000 && r <= 0x10FFFF
1146 }
1147
1148 // Get name space name: name with a : stuck in the middle.
1149 // The part before the : is the name space identifier.
1150 func (d *Decoder) nsname() (name Name, ok bool) {
1151         s, ok := d.name()
1152         if !ok {
1153                 return
1154         }
1155         i := strings.Index(s, ":")
1156         if i < 0 {
1157                 name.Local = s
1158         } else {
1159                 name.Space = s[0:i]
1160                 name.Local = s[i+1:]
1161         }
1162         return name, true
1163 }
1164
1165 // Get name: /first(first|second)*/
1166 // Do not set d.err if the name is missing (unless unexpected EOF is received):
1167 // let the caller provide better context.
1168 func (d *Decoder) name() (s string, ok bool) {
1169         d.buf.Reset()
1170         if !d.readName() {
1171                 return "", false
1172         }
1173
1174         // Now we check the characters.
1175         b := d.buf.Bytes()
1176         if !isName(b) {
1177                 d.err = d.syntaxError("invalid XML name: " + string(b))
1178                 return "", false
1179         }
1180         return string(b), true
1181 }
1182
1183 // Read a name and append its bytes to d.buf.
1184 // The name is delimited by any single-byte character not valid in names.
1185 // All multi-byte characters are accepted; the caller must check their validity.
1186 func (d *Decoder) readName() (ok bool) {
1187         var b byte
1188         if b, ok = d.mustgetc(); !ok {
1189                 return
1190         }
1191         if b < utf8.RuneSelf && !isNameByte(b) {
1192                 d.ungetc(b)
1193                 return false
1194         }
1195         d.buf.WriteByte(b)
1196
1197         for {
1198                 if b, ok = d.mustgetc(); !ok {
1199                         return
1200                 }
1201                 if b < utf8.RuneSelf && !isNameByte(b) {
1202                         d.ungetc(b)
1203                         break
1204                 }
1205                 d.buf.WriteByte(b)
1206         }
1207         return true
1208 }
1209
1210 func isNameByte(c byte) bool {
1211         return 'A' <= c && c <= 'Z' ||
1212                 'a' <= c && c <= 'z' ||
1213                 '0' <= c && c <= '9' ||
1214                 c == '_' || c == ':' || c == '.' || c == '-'
1215 }
1216
1217 func isName(s []byte) bool {
1218         if len(s) == 0 {
1219                 return false
1220         }
1221         c, n := utf8.DecodeRune(s)
1222         if c == utf8.RuneError && n == 1 {
1223                 return false
1224         }
1225         if !unicode.Is(first, c) {
1226                 return false
1227         }
1228         for n < len(s) {
1229                 s = s[n:]
1230                 c, n = utf8.DecodeRune(s)
1231                 if c == utf8.RuneError && n == 1 {
1232                         return false
1233                 }
1234                 if !unicode.Is(first, c) && !unicode.Is(second, c) {
1235                         return false
1236                 }
1237         }
1238         return true
1239 }
1240
1241 func isNameString(s string) bool {
1242         if len(s) == 0 {
1243                 return false
1244         }
1245         c, n := utf8.DecodeRuneInString(s)
1246         if c == utf8.RuneError && n == 1 {
1247                 return false
1248         }
1249         if !unicode.Is(first, c) {
1250                 return false
1251         }
1252         for n < len(s) {
1253                 s = s[n:]
1254                 c, n = utf8.DecodeRuneInString(s)
1255                 if c == utf8.RuneError && n == 1 {
1256                         return false
1257                 }
1258                 if !unicode.Is(first, c) && !unicode.Is(second, c) {
1259                         return false
1260                 }
1261         }
1262         return true
1263 }
1264
1265 // These tables were generated by cut and paste from Appendix B of
1266 // the XML spec at https://www.xml.com/axml/testaxml.htm
1267 // and then reformatting. First corresponds to (Letter | '_' | ':')
1268 // and second corresponds to NameChar.
1269
1270 var first = &unicode.RangeTable{
1271         R16: []unicode.Range16{
1272                 {0x003A, 0x003A, 1},
1273                 {0x0041, 0x005A, 1},
1274                 {0x005F, 0x005F, 1},
1275                 {0x0061, 0x007A, 1},
1276                 {0x00C0, 0x00D6, 1},
1277                 {0x00D8, 0x00F6, 1},
1278                 {0x00F8, 0x00FF, 1},
1279                 {0x0100, 0x0131, 1},
1280                 {0x0134, 0x013E, 1},
1281                 {0x0141, 0x0148, 1},
1282                 {0x014A, 0x017E, 1},
1283                 {0x0180, 0x01C3, 1},
1284                 {0x01CD, 0x01F0, 1},
1285                 {0x01F4, 0x01F5, 1},
1286                 {0x01FA, 0x0217, 1},
1287                 {0x0250, 0x02A8, 1},
1288                 {0x02BB, 0x02C1, 1},
1289                 {0x0386, 0x0386, 1},
1290                 {0x0388, 0x038A, 1},
1291                 {0x038C, 0x038C, 1},
1292                 {0x038E, 0x03A1, 1},
1293                 {0x03A3, 0x03CE, 1},
1294                 {0x03D0, 0x03D6, 1},
1295                 {0x03DA, 0x03E0, 2},
1296                 {0x03E2, 0x03F3, 1},
1297                 {0x0401, 0x040C, 1},
1298                 {0x040E, 0x044F, 1},
1299                 {0x0451, 0x045C, 1},
1300                 {0x045E, 0x0481, 1},
1301                 {0x0490, 0x04C4, 1},
1302                 {0x04C7, 0x04C8, 1},
1303                 {0x04CB, 0x04CC, 1},
1304                 {0x04D0, 0x04EB, 1},
1305                 {0x04EE, 0x04F5, 1},
1306                 {0x04F8, 0x04F9, 1},
1307                 {0x0531, 0x0556, 1},
1308                 {0x0559, 0x0559, 1},
1309                 {0x0561, 0x0586, 1},
1310                 {0x05D0, 0x05EA, 1},
1311                 {0x05F0, 0x05F2, 1},
1312                 {0x0621, 0x063A, 1},
1313                 {0x0641, 0x064A, 1},
1314                 {0x0671, 0x06B7, 1},
1315                 {0x06BA, 0x06BE, 1},
1316                 {0x06C0, 0x06CE, 1},
1317                 {0x06D0, 0x06D3, 1},
1318                 {0x06D5, 0x06D5, 1},
1319                 {0x06E5, 0x06E6, 1},
1320                 {0x0905, 0x0939, 1},
1321                 {0x093D, 0x093D, 1},
1322                 {0x0958, 0x0961, 1},
1323                 {0x0985, 0x098C, 1},
1324                 {0x098F, 0x0990, 1},
1325                 {0x0993, 0x09A8, 1},
1326                 {0x09AA, 0x09B0, 1},
1327                 {0x09B2, 0x09B2, 1},
1328                 {0x09B6, 0x09B9, 1},
1329                 {0x09DC, 0x09DD, 1},
1330                 {0x09DF, 0x09E1, 1},
1331                 {0x09F0, 0x09F1, 1},
1332                 {0x0A05, 0x0A0A, 1},
1333                 {0x0A0F, 0x0A10, 1},
1334                 {0x0A13, 0x0A28, 1},
1335                 {0x0A2A, 0x0A30, 1},
1336                 {0x0A32, 0x0A33, 1},
1337                 {0x0A35, 0x0A36, 1},
1338                 {0x0A38, 0x0A39, 1},
1339                 {0x0A59, 0x0A5C, 1},
1340                 {0x0A5E, 0x0A5E, 1},
1341                 {0x0A72, 0x0A74, 1},
1342                 {0x0A85, 0x0A8B, 1},
1343                 {0x0A8D, 0x0A8D, 1},
1344                 {0x0A8F, 0x0A91, 1},
1345                 {0x0A93, 0x0AA8, 1},
1346                 {0x0AAA, 0x0AB0, 1},
1347                 {0x0AB2, 0x0AB3, 1},
1348                 {0x0AB5, 0x0AB9, 1},
1349                 {0x0ABD, 0x0AE0, 0x23},
1350                 {0x0B05, 0x0B0C, 1},
1351                 {0x0B0F, 0x0B10, 1},
1352                 {0x0B13, 0x0B28, 1},
1353                 {0x0B2A, 0x0B30, 1},
1354                 {0x0B32, 0x0B33, 1},
1355                 {0x0B36, 0x0B39, 1},
1356                 {0x0B3D, 0x0B3D, 1},
1357                 {0x0B5C, 0x0B5D, 1},
1358                 {0x0B5F, 0x0B61, 1},
1359                 {0x0B85, 0x0B8A, 1},
1360                 {0x0B8E, 0x0B90, 1},
1361                 {0x0B92, 0x0B95, 1},
1362                 {0x0B99, 0x0B9A, 1},
1363                 {0x0B9C, 0x0B9C, 1},
1364                 {0x0B9E, 0x0B9F, 1},
1365                 {0x0BA3, 0x0BA4, 1},
1366                 {0x0BA8, 0x0BAA, 1},
1367                 {0x0BAE, 0x0BB5, 1},
1368                 {0x0BB7, 0x0BB9, 1},
1369                 {0x0C05, 0x0C0C, 1},
1370                 {0x0C0E, 0x0C10, 1},
1371                 {0x0C12, 0x0C28, 1},
1372                 {0x0C2A, 0x0C33, 1},
1373                 {0x0C35, 0x0C39, 1},
1374                 {0x0C60, 0x0C61, 1},
1375                 {0x0C85, 0x0C8C, 1},
1376                 {0x0C8E, 0x0C90, 1},
1377                 {0x0C92, 0x0CA8, 1},
1378                 {0x0CAA, 0x0CB3, 1},
1379                 {0x0CB5, 0x0CB9, 1},
1380                 {0x0CDE, 0x0CDE, 1},
1381                 {0x0CE0, 0x0CE1, 1},
1382                 {0x0D05, 0x0D0C, 1},
1383                 {0x0D0E, 0x0D10, 1},
1384                 {0x0D12, 0x0D28, 1},
1385                 {0x0D2A, 0x0D39, 1},
1386                 {0x0D60, 0x0D61, 1},
1387                 {0x0E01, 0x0E2E, 1},
1388                 {0x0E30, 0x0E30, 1},
1389                 {0x0E32, 0x0E33, 1},
1390                 {0x0E40, 0x0E45, 1},
1391                 {0x0E81, 0x0E82, 1},
1392                 {0x0E84, 0x0E84, 1},
1393                 {0x0E87, 0x0E88, 1},
1394                 {0x0E8A, 0x0E8D, 3},
1395                 {0x0E94, 0x0E97, 1},
1396                 {0x0E99, 0x0E9F, 1},
1397                 {0x0EA1, 0x0EA3, 1},
1398                 {0x0EA5, 0x0EA7, 2},
1399                 {0x0EAA, 0x0EAB, 1},
1400                 {0x0EAD, 0x0EAE, 1},
1401                 {0x0EB0, 0x0EB0, 1},
1402                 {0x0EB2, 0x0EB3, 1},
1403                 {0x0EBD, 0x0EBD, 1},
1404                 {0x0EC0, 0x0EC4, 1},
1405                 {0x0F40, 0x0F47, 1},
1406                 {0x0F49, 0x0F69, 1},
1407                 {0x10A0, 0x10C5, 1},
1408                 {0x10D0, 0x10F6, 1},
1409                 {0x1100, 0x1100, 1},
1410                 {0x1102, 0x1103, 1},
1411                 {0x1105, 0x1107, 1},
1412                 {0x1109, 0x1109, 1},
1413                 {0x110B, 0x110C, 1},
1414                 {0x110E, 0x1112, 1},
1415                 {0x113C, 0x1140, 2},
1416                 {0x114C, 0x1150, 2},
1417                 {0x1154, 0x1155, 1},
1418                 {0x1159, 0x1159, 1},
1419                 {0x115F, 0x1161, 1},
1420                 {0x1163, 0x1169, 2},
1421                 {0x116D, 0x116E, 1},
1422                 {0x1172, 0x1173, 1},
1423                 {0x1175, 0x119E, 0x119E - 0x1175},
1424                 {0x11A8, 0x11AB, 0x11AB - 0x11A8},
1425                 {0x11AE, 0x11AF, 1},
1426                 {0x11B7, 0x11B8, 1},
1427                 {0x11BA, 0x11BA, 1},
1428                 {0x11BC, 0x11C2, 1},
1429                 {0x11EB, 0x11F0, 0x11F0 - 0x11EB},
1430                 {0x11F9, 0x11F9, 1},
1431                 {0x1E00, 0x1E9B, 1},
1432                 {0x1EA0, 0x1EF9, 1},
1433                 {0x1F00, 0x1F15, 1},
1434                 {0x1F18, 0x1F1D, 1},
1435                 {0x1F20, 0x1F45, 1},
1436                 {0x1F48, 0x1F4D, 1},
1437                 {0x1F50, 0x1F57, 1},
1438                 {0x1F59, 0x1F5B, 0x1F5B - 0x1F59},
1439                 {0x1F5D, 0x1F5D, 1},
1440                 {0x1F5F, 0x1F7D, 1},
1441                 {0x1F80, 0x1FB4, 1},
1442                 {0x1FB6, 0x1FBC, 1},
1443                 {0x1FBE, 0x1FBE, 1},
1444                 {0x1FC2, 0x1FC4, 1},
1445                 {0x1FC6, 0x1FCC, 1},
1446                 {0x1FD0, 0x1FD3, 1},
1447                 {0x1FD6, 0x1FDB, 1},
1448                 {0x1FE0, 0x1FEC, 1},
1449                 {0x1FF2, 0x1FF4, 1},
1450                 {0x1FF6, 0x1FFC, 1},
1451                 {0x2126, 0x2126, 1},
1452                 {0x212A, 0x212B, 1},
1453                 {0x212E, 0x212E, 1},
1454                 {0x2180, 0x2182, 1},
1455                 {0x3007, 0x3007, 1},
1456                 {0x3021, 0x3029, 1},
1457                 {0x3041, 0x3094, 1},
1458                 {0x30A1, 0x30FA, 1},
1459                 {0x3105, 0x312C, 1},
1460                 {0x4E00, 0x9FA5, 1},
1461                 {0xAC00, 0xD7A3, 1},
1462         },
1463 }
1464
1465 var second = &unicode.RangeTable{
1466         R16: []unicode.Range16{
1467                 {0x002D, 0x002E, 1},
1468                 {0x0030, 0x0039, 1},
1469                 {0x00B7, 0x00B7, 1},
1470                 {0x02D0, 0x02D1, 1},
1471                 {0x0300, 0x0345, 1},
1472                 {0x0360, 0x0361, 1},
1473                 {0x0387, 0x0387, 1},
1474                 {0x0483, 0x0486, 1},
1475                 {0x0591, 0x05A1, 1},
1476                 {0x05A3, 0x05B9, 1},
1477                 {0x05BB, 0x05BD, 1},
1478                 {0x05BF, 0x05BF, 1},
1479                 {0x05C1, 0x05C2, 1},
1480                 {0x05C4, 0x0640, 0x0640 - 0x05C4},
1481                 {0x064B, 0x0652, 1},
1482                 {0x0660, 0x0669, 1},
1483                 {0x0670, 0x0670, 1},
1484                 {0x06D6, 0x06DC, 1},
1485                 {0x06DD, 0x06DF, 1},
1486                 {0x06E0, 0x06E4, 1},
1487                 {0x06E7, 0x06E8, 1},
1488                 {0x06EA, 0x06ED, 1},
1489                 {0x06F0, 0x06F9, 1},
1490                 {0x0901, 0x0903, 1},
1491                 {0x093C, 0x093C, 1},
1492                 {0x093E, 0x094C, 1},
1493                 {0x094D, 0x094D, 1},
1494                 {0x0951, 0x0954, 1},
1495                 {0x0962, 0x0963, 1},
1496                 {0x0966, 0x096F, 1},
1497                 {0x0981, 0x0983, 1},
1498                 {0x09BC, 0x09BC, 1},
1499                 {0x09BE, 0x09BF, 1},
1500                 {0x09C0, 0x09C4, 1},
1501                 {0x09C7, 0x09C8, 1},
1502                 {0x09CB, 0x09CD, 1},
1503                 {0x09D7, 0x09D7, 1},
1504                 {0x09E2, 0x09E3, 1},
1505                 {0x09E6, 0x09EF, 1},
1506                 {0x0A02, 0x0A3C, 0x3A},
1507                 {0x0A3E, 0x0A3F, 1},
1508                 {0x0A40, 0x0A42, 1},
1509                 {0x0A47, 0x0A48, 1},
1510                 {0x0A4B, 0x0A4D, 1},
1511                 {0x0A66, 0x0A6F, 1},
1512                 {0x0A70, 0x0A71, 1},
1513                 {0x0A81, 0x0A83, 1},
1514                 {0x0ABC, 0x0ABC, 1},
1515                 {0x0ABE, 0x0AC5, 1},
1516                 {0x0AC7, 0x0AC9, 1},
1517                 {0x0ACB, 0x0ACD, 1},
1518                 {0x0AE6, 0x0AEF, 1},
1519                 {0x0B01, 0x0B03, 1},
1520                 {0x0B3C, 0x0B3C, 1},
1521                 {0x0B3E, 0x0B43, 1},
1522                 {0x0B47, 0x0B48, 1},
1523                 {0x0B4B, 0x0B4D, 1},
1524                 {0x0B56, 0x0B57, 1},
1525                 {0x0B66, 0x0B6F, 1},
1526                 {0x0B82, 0x0B83, 1},
1527                 {0x0BBE, 0x0BC2, 1},
1528                 {0x0BC6, 0x0BC8, 1},
1529                 {0x0BCA, 0x0BCD, 1},
1530                 {0x0BD7, 0x0BD7, 1},
1531                 {0x0BE7, 0x0BEF, 1},
1532                 {0x0C01, 0x0C03, 1},
1533                 {0x0C3E, 0x0C44, 1},
1534                 {0x0C46, 0x0C48, 1},
1535                 {0x0C4A, 0x0C4D, 1},
1536                 {0x0C55, 0x0C56, 1},
1537                 {0x0C66, 0x0C6F, 1},
1538                 {0x0C82, 0x0C83, 1},
1539                 {0x0CBE, 0x0CC4, 1},
1540                 {0x0CC6, 0x0CC8, 1},
1541                 {0x0CCA, 0x0CCD, 1},
1542                 {0x0CD5, 0x0CD6, 1},
1543                 {0x0CE6, 0x0CEF, 1},
1544                 {0x0D02, 0x0D03, 1},
1545                 {0x0D3E, 0x0D43, 1},
1546                 {0x0D46, 0x0D48, 1},
1547                 {0x0D4A, 0x0D4D, 1},
1548                 {0x0D57, 0x0D57, 1},
1549                 {0x0D66, 0x0D6F, 1},
1550                 {0x0E31, 0x0E31, 1},
1551                 {0x0E34, 0x0E3A, 1},
1552                 {0x0E46, 0x0E46, 1},
1553                 {0x0E47, 0x0E4E, 1},
1554                 {0x0E50, 0x0E59, 1},
1555                 {0x0EB1, 0x0EB1, 1},
1556                 {0x0EB4, 0x0EB9, 1},
1557                 {0x0EBB, 0x0EBC, 1},
1558                 {0x0EC6, 0x0EC6, 1},
1559                 {0x0EC8, 0x0ECD, 1},
1560                 {0x0ED0, 0x0ED9, 1},
1561                 {0x0F18, 0x0F19, 1},
1562                 {0x0F20, 0x0F29, 1},
1563                 {0x0F35, 0x0F39, 2},
1564                 {0x0F3E, 0x0F3F, 1},
1565                 {0x0F71, 0x0F84, 1},
1566                 {0x0F86, 0x0F8B, 1},
1567                 {0x0F90, 0x0F95, 1},
1568                 {0x0F97, 0x0F97, 1},
1569                 {0x0F99, 0x0FAD, 1},
1570                 {0x0FB1, 0x0FB7, 1},
1571                 {0x0FB9, 0x0FB9, 1},
1572                 {0x20D0, 0x20DC, 1},
1573                 {0x20E1, 0x3005, 0x3005 - 0x20E1},
1574                 {0x302A, 0x302F, 1},
1575                 {0x3031, 0x3035, 1},
1576                 {0x3099, 0x309A, 1},
1577                 {0x309D, 0x309E, 1},
1578                 {0x30FC, 0x30FE, 1},
1579         },
1580 }
1581
1582 // HTMLEntity is an entity map containing translations for the
1583 // standard HTML entity characters.
1584 //
1585 // See the Decoder.Strict and Decoder.Entity fields' documentation.
1586 var HTMLEntity map[string]string = htmlEntity
1587
1588 var htmlEntity = map[string]string{
1589         /*
1590                 hget http://www.w3.org/TR/html4/sgml/entities.html |
1591                 ssam '
1592                         ,y /\&gt;/ x/\&lt;(.|\n)+/ s/\n/ /g
1593                         ,x v/^\&lt;!ENTITY/d
1594                         ,s/\&lt;!ENTITY ([^ ]+) .*U\+([0-9A-F][0-9A-F][0-9A-F][0-9A-F]) .+/     "\1": "\\u\2",/g
1595                 '
1596         */
1597         "nbsp":     "\u00A0",
1598         "iexcl":    "\u00A1",
1599         "cent":     "\u00A2",
1600         "pound":    "\u00A3",
1601         "curren":   "\u00A4",
1602         "yen":      "\u00A5",
1603         "brvbar":   "\u00A6",
1604         "sect":     "\u00A7",
1605         "uml":      "\u00A8",
1606         "copy":     "\u00A9",
1607         "ordf":     "\u00AA",
1608         "laquo":    "\u00AB",
1609         "not":      "\u00AC",
1610         "shy":      "\u00AD",
1611         "reg":      "\u00AE",
1612         "macr":     "\u00AF",
1613         "deg":      "\u00B0",
1614         "plusmn":   "\u00B1",
1615         "sup2":     "\u00B2",
1616         "sup3":     "\u00B3",
1617         "acute":    "\u00B4",
1618         "micro":    "\u00B5",
1619         "para":     "\u00B6",
1620         "middot":   "\u00B7",
1621         "cedil":    "\u00B8",
1622         "sup1":     "\u00B9",
1623         "ordm":     "\u00BA",
1624         "raquo":    "\u00BB",
1625         "frac14":   "\u00BC",
1626         "frac12":   "\u00BD",
1627         "frac34":   "\u00BE",
1628         "iquest":   "\u00BF",
1629         "Agrave":   "\u00C0",
1630         "Aacute":   "\u00C1",
1631         "Acirc":    "\u00C2",
1632         "Atilde":   "\u00C3",
1633         "Auml":     "\u00C4",
1634         "Aring":    "\u00C5",
1635         "AElig":    "\u00C6",
1636         "Ccedil":   "\u00C7",
1637         "Egrave":   "\u00C8",
1638         "Eacute":   "\u00C9",
1639         "Ecirc":    "\u00CA",
1640         "Euml":     "\u00CB",
1641         "Igrave":   "\u00CC",
1642         "Iacute":   "\u00CD",
1643         "Icirc":    "\u00CE",
1644         "Iuml":     "\u00CF",
1645         "ETH":      "\u00D0",
1646         "Ntilde":   "\u00D1",
1647         "Ograve":   "\u00D2",
1648         "Oacute":   "\u00D3",
1649         "Ocirc":    "\u00D4",
1650         "Otilde":   "\u00D5",
1651         "Ouml":     "\u00D6",
1652         "times":    "\u00D7",
1653         "Oslash":   "\u00D8",
1654         "Ugrave":   "\u00D9",
1655         "Uacute":   "\u00DA",
1656         "Ucirc":    "\u00DB",
1657         "Uuml":     "\u00DC",
1658         "Yacute":   "\u00DD",
1659         "THORN":    "\u00DE",
1660         "szlig":    "\u00DF",
1661         "agrave":   "\u00E0",
1662         "aacute":   "\u00E1",
1663         "acirc":    "\u00E2",
1664         "atilde":   "\u00E3",
1665         "auml":     "\u00E4",
1666         "aring":    "\u00E5",
1667         "aelig":    "\u00E6",
1668         "ccedil":   "\u00E7",
1669         "egrave":   "\u00E8",
1670         "eacute":   "\u00E9",
1671         "ecirc":    "\u00EA",
1672         "euml":     "\u00EB",
1673         "igrave":   "\u00EC",
1674         "iacute":   "\u00ED",
1675         "icirc":    "\u00EE",
1676         "iuml":     "\u00EF",
1677         "eth":      "\u00F0",
1678         "ntilde":   "\u00F1",
1679         "ograve":   "\u00F2",
1680         "oacute":   "\u00F3",
1681         "ocirc":    "\u00F4",
1682         "otilde":   "\u00F5",
1683         "ouml":     "\u00F6",
1684         "divide":   "\u00F7",
1685         "oslash":   "\u00F8",
1686         "ugrave":   "\u00F9",
1687         "uacute":   "\u00FA",
1688         "ucirc":    "\u00FB",
1689         "uuml":     "\u00FC",
1690         "yacute":   "\u00FD",
1691         "thorn":    "\u00FE",
1692         "yuml":     "\u00FF",
1693         "fnof":     "\u0192",
1694         "Alpha":    "\u0391",
1695         "Beta":     "\u0392",
1696         "Gamma":    "\u0393",
1697         "Delta":    "\u0394",
1698         "Epsilon":  "\u0395",
1699         "Zeta":     "\u0396",
1700         "Eta":      "\u0397",
1701         "Theta":    "\u0398",
1702         "Iota":     "\u0399",
1703         "Kappa":    "\u039A",
1704         "Lambda":   "\u039B",
1705         "Mu":       "\u039C",
1706         "Nu":       "\u039D",
1707         "Xi":       "\u039E",
1708         "Omicron":  "\u039F",
1709         "Pi":       "\u03A0",
1710         "Rho":      "\u03A1",
1711         "Sigma":    "\u03A3",
1712         "Tau":      "\u03A4",
1713         "Upsilon":  "\u03A5",
1714         "Phi":      "\u03A6",
1715         "Chi":      "\u03A7",
1716         "Psi":      "\u03A8",
1717         "Omega":    "\u03A9",
1718         "alpha":    "\u03B1",
1719         "beta":     "\u03B2",
1720         "gamma":    "\u03B3",
1721         "delta":    "\u03B4",
1722         "epsilon":  "\u03B5",
1723         "zeta":     "\u03B6",
1724         "eta":      "\u03B7",
1725         "theta":    "\u03B8",
1726         "iota":     "\u03B9",
1727         "kappa":    "\u03BA",
1728         "lambda":   "\u03BB",
1729         "mu":       "\u03BC",
1730         "nu":       "\u03BD",
1731         "xi":       "\u03BE",
1732         "omicron":  "\u03BF",
1733         "pi":       "\u03C0",
1734         "rho":      "\u03C1",
1735         "sigmaf":   "\u03C2",
1736         "sigma":    "\u03C3",
1737         "tau":      "\u03C4",
1738         "upsilon":  "\u03C5",
1739         "phi":      "\u03C6",
1740         "chi":      "\u03C7",
1741         "psi":      "\u03C8",
1742         "omega":    "\u03C9",
1743         "thetasym": "\u03D1",
1744         "upsih":    "\u03D2",
1745         "piv":      "\u03D6",
1746         "bull":     "\u2022",
1747         "hellip":   "\u2026",
1748         "prime":    "\u2032",
1749         "Prime":    "\u2033",
1750         "oline":    "\u203E",
1751         "frasl":    "\u2044",
1752         "weierp":   "\u2118",
1753         "image":    "\u2111",
1754         "real":     "\u211C",
1755         "trade":    "\u2122",
1756         "alefsym":  "\u2135",
1757         "larr":     "\u2190",
1758         "uarr":     "\u2191",
1759         "rarr":     "\u2192",
1760         "darr":     "\u2193",
1761         "harr":     "\u2194",
1762         "crarr":    "\u21B5",
1763         "lArr":     "\u21D0",
1764         "uArr":     "\u21D1",
1765         "rArr":     "\u21D2",
1766         "dArr":     "\u21D3",
1767         "hArr":     "\u21D4",
1768         "forall":   "\u2200",
1769         "part":     "\u2202",
1770         "exist":    "\u2203",
1771         "empty":    "\u2205",
1772         "nabla":    "\u2207",
1773         "isin":     "\u2208",
1774         "notin":    "\u2209",
1775         "ni":       "\u220B",
1776         "prod":     "\u220F",
1777         "sum":      "\u2211",
1778         "minus":    "\u2212",
1779         "lowast":   "\u2217",
1780         "radic":    "\u221A",
1781         "prop":     "\u221D",
1782         "infin":    "\u221E",
1783         "ang":      "\u2220",
1784         "and":      "\u2227",
1785         "or":       "\u2228",
1786         "cap":      "\u2229",
1787         "cup":      "\u222A",
1788         "int":      "\u222B",
1789         "there4":   "\u2234",
1790         "sim":      "\u223C",
1791         "cong":     "\u2245",
1792         "asymp":    "\u2248",
1793         "ne":       "\u2260",
1794         "equiv":    "\u2261",
1795         "le":       "\u2264",
1796         "ge":       "\u2265",
1797         "sub":      "\u2282",
1798         "sup":      "\u2283",
1799         "nsub":     "\u2284",
1800         "sube":     "\u2286",
1801         "supe":     "\u2287",
1802         "oplus":    "\u2295",
1803         "otimes":   "\u2297",
1804         "perp":     "\u22A5",
1805         "sdot":     "\u22C5",
1806         "lceil":    "\u2308",
1807         "rceil":    "\u2309",
1808         "lfloor":   "\u230A",
1809         "rfloor":   "\u230B",
1810         "lang":     "\u2329",
1811         "rang":     "\u232A",
1812         "loz":      "\u25CA",
1813         "spades":   "\u2660",
1814         "clubs":    "\u2663",
1815         "hearts":   "\u2665",
1816         "diams":    "\u2666",
1817         "quot":     "\u0022",
1818         "amp":      "\u0026",
1819         "lt":       "\u003C",
1820         "gt":       "\u003E",
1821         "OElig":    "\u0152",
1822         "oelig":    "\u0153",
1823         "Scaron":   "\u0160",
1824         "scaron":   "\u0161",
1825         "Yuml":     "\u0178",
1826         "circ":     "\u02C6",
1827         "tilde":    "\u02DC",
1828         "ensp":     "\u2002",
1829         "emsp":     "\u2003",
1830         "thinsp":   "\u2009",
1831         "zwnj":     "\u200C",
1832         "zwj":      "\u200D",
1833         "lrm":      "\u200E",
1834         "rlm":      "\u200F",
1835         "ndash":    "\u2013",
1836         "mdash":    "\u2014",
1837         "lsquo":    "\u2018",
1838         "rsquo":    "\u2019",
1839         "sbquo":    "\u201A",
1840         "ldquo":    "\u201C",
1841         "rdquo":    "\u201D",
1842         "bdquo":    "\u201E",
1843         "dagger":   "\u2020",
1844         "Dagger":   "\u2021",
1845         "permil":   "\u2030",
1846         "lsaquo":   "\u2039",
1847         "rsaquo":   "\u203A",
1848         "euro":     "\u20AC",
1849 }
1850
1851 // HTMLAutoClose is the set of HTML elements that
1852 // should be considered to close automatically.
1853 //
1854 // See the Decoder.Strict and Decoder.Entity fields' documentation.
1855 var HTMLAutoClose []string = htmlAutoClose
1856
1857 var htmlAutoClose = []string{
1858         /*
1859                 hget http://www.w3.org/TR/html4/loose.dtd |
1860                 9 sed -n 's/<!ELEMENT ([^ ]*) +- O EMPTY.+/     "\1",/p' | tr A-Z a-z
1861         */
1862         "basefont",
1863         "br",
1864         "area",
1865         "link",
1866         "img",
1867         "param",
1868         "hr",
1869         "input",
1870         "col",
1871         "frame",
1872         "isindex",
1873         "base",
1874         "meta",
1875 }
1876
1877 var (
1878         escQuot = []byte("&#34;") // shorter than "&quot;"
1879         escApos = []byte("&#39;") // shorter than "&apos;"
1880         escAmp  = []byte("&amp;")
1881         escLT   = []byte("&lt;")
1882         escGT   = []byte("&gt;")
1883         escTab  = []byte("&#x9;")
1884         escNL   = []byte("&#xA;")
1885         escCR   = []byte("&#xD;")
1886         escFFFD = []byte("\uFFFD") // Unicode replacement character
1887 )
1888
1889 // EscapeText writes to w the properly escaped XML equivalent
1890 // of the plain text data s.
1891 func EscapeText(w io.Writer, s []byte) error {
1892         return escapeText(w, s, true)
1893 }
1894
1895 // escapeText writes to w the properly escaped XML equivalent
1896 // of the plain text data s. If escapeNewline is true, newline
1897 // characters will be escaped.
1898 func escapeText(w io.Writer, s []byte, escapeNewline bool) error {
1899         var esc []byte
1900         last := 0
1901         for i := 0; i < len(s); {
1902                 r, width := utf8.DecodeRune(s[i:])
1903                 i += width
1904                 switch r {
1905                 case '"':
1906                         esc = escQuot
1907                 case '\'':
1908                         esc = escApos
1909                 case '&':
1910                         esc = escAmp
1911                 case '<':
1912                         esc = escLT
1913                 case '>':
1914                         esc = escGT
1915                 case '\t':
1916                         esc = escTab
1917                 case '\n':
1918                         if !escapeNewline {
1919                                 continue
1920                         }
1921                         esc = escNL
1922                 case '\r':
1923                         esc = escCR
1924                 default:
1925                         if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
1926                                 esc = escFFFD
1927                                 break
1928                         }
1929                         continue
1930                 }
1931                 if _, err := w.Write(s[last : i-width]); err != nil {
1932                         return err
1933                 }
1934                 if _, err := w.Write(esc); err != nil {
1935                         return err
1936                 }
1937                 last = i
1938         }
1939         _, err := w.Write(s[last:])
1940         return err
1941 }
1942
1943 // EscapeString writes to p the properly escaped XML equivalent
1944 // of the plain text data s.
1945 func (p *printer) EscapeString(s string) {
1946         var esc []byte
1947         last := 0
1948         for i := 0; i < len(s); {
1949                 r, width := utf8.DecodeRuneInString(s[i:])
1950                 i += width
1951                 switch r {
1952                 case '"':
1953                         esc = escQuot
1954                 case '\'':
1955                         esc = escApos
1956                 case '&':
1957                         esc = escAmp
1958                 case '<':
1959                         esc = escLT
1960                 case '>':
1961                         esc = escGT
1962                 case '\t':
1963                         esc = escTab
1964                 case '\n':
1965                         esc = escNL
1966                 case '\r':
1967                         esc = escCR
1968                 default:
1969                         if !isInCharacterRange(r) || (r == 0xFFFD && width == 1) {
1970                                 esc = escFFFD
1971                                 break
1972                         }
1973                         continue
1974                 }
1975                 p.WriteString(s[last : i-width])
1976                 p.Write(esc)
1977                 last = i
1978         }
1979         p.WriteString(s[last:])
1980 }
1981
1982 // Escape is like EscapeText but omits the error return value.
1983 // It is provided for backwards compatibility with Go 1.0.
1984 // Code targeting Go 1.1 or later should use EscapeText.
1985 func Escape(w io.Writer, s []byte) {
1986         EscapeText(w, s)
1987 }
1988
1989 var (
1990         cdataStart  = []byte("<![CDATA[")
1991         cdataEnd    = []byte("]]>")
1992         cdataEscape = []byte("]]]]><![CDATA[>")
1993 )
1994
1995 // emitCDATA writes to w the CDATA-wrapped plain text data s.
1996 // It escapes CDATA directives nested in s.
1997 func emitCDATA(w io.Writer, s []byte) error {
1998         if len(s) == 0 {
1999                 return nil
2000         }
2001         if _, err := w.Write(cdataStart); err != nil {
2002                 return err
2003         }
2004         for {
2005                 i := bytes.Index(s, cdataEnd)
2006                 if i >= 0 && i+len(cdataEnd) <= len(s) {
2007                         // Found a nested CDATA directive end.
2008                         if _, err := w.Write(s[:i]); err != nil {
2009                                 return err
2010                         }
2011                         if _, err := w.Write(cdataEscape); err != nil {
2012                                 return err
2013                         }
2014                         i += len(cdataEnd)
2015                 } else {
2016                         if _, err := w.Write(s); err != nil {
2017                                 return err
2018                         }
2019                         break
2020                 }
2021                 s = s[i:]
2022         }
2023         _, err := w.Write(cdataEnd)
2024         return err
2025 }
2026
2027 // procInst parses the `param="..."` or `param='...'`
2028 // value out of the provided string, returning "" if not found.
2029 func procInst(param, s string) string {
2030         // TODO: this parsing is somewhat lame and not exact.
2031         // It works for all actual cases, though.
2032         param = param + "="
2033         idx := strings.Index(s, param)
2034         if idx == -1 {
2035                 return ""
2036         }
2037         v := s[idx+len(param):]
2038         if v == "" {
2039                 return ""
2040         }
2041         if v[0] != '\'' && v[0] != '"' {
2042                 return ""
2043         }
2044         idx = strings.IndexRune(v[1:], rune(v[0]))
2045         if idx == -1 {
2046                 return ""
2047         }
2048         return v[1 : idx+1]
2049 }