libphobos/src/std/xml.d

   1 // Written in the D programming language.
   2
   3 /**
   4 $(RED Warning: This module is considered out-dated and not up to Phobos'
   5       current standards. It will remain until we have a suitable replacement,
   6       but be aware that it will not remain long term.)
   7
   8 Classes and functions for creating and parsing XML
   9
  10 The basic architecture of this module is that there are standalone functions,
  11 classes for constructing an XML document from scratch (Tag, Element and
  12 Document), and also classes for parsing a pre-existing XML file (ElementParser
  13 and DocumentParser). The parsing classes <i>may</i> be used to build a
  14 Document, but that is not their primary purpose. The handling capabilities of
  15 DocumentParser and ElementParser are sufficiently customizable that you can
  16 make them do pretty much whatever you want.
  17
  18 Example: This example creates a DOM (Document Object Model) tree
  19     from an XML file.
  20 ------------------------------------------------------------------------------
  21 import std.xml;
  22 import std.stdio;
  23 import std.string;
  24 import std.file;
  25
  26 // books.xml is used in various samples throughout the Microsoft XML Core
  27 // Services (MSXML) SDK.
  28 //
  29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
  30
  31 void main()
  32 {
  33     string s = cast(string) std.file.read("books.xml");
  34
  35     // Check for well-formedness
  36     check(s);
  37
  38     // Make a DOM tree
  39     auto doc = new Document(s);
  40
  41     // Plain-print it
  42     writeln(doc);
  43 }
  44 ------------------------------------------------------------------------------
  45
  46 Example: This example does much the same thing, except that the file is
  47     deconstructed and reconstructed by hand. This is more work, but the
  48     techniques involved offer vastly more power.
  49 ------------------------------------------------------------------------------
  50 import std.xml;
  51 import std.stdio;
  52 import std.string;
  53
  54 struct Book
  55 {
  56     string id;
  57     string author;
  58     string title;
  59     string genre;
  60     string price;
  61     string pubDate;
  62     string description;
  63 }
  64
  65 void main()
  66 {
  67     string s = cast(string) std.file.read("books.xml");
  68
  69     // Check for well-formedness
  70     check(s);
  71
  72     // Take it apart
  73     Book[] books;
  74
  75     auto xml = new DocumentParser(s);
  76     xml.onStartTag["book"] = (ElementParser xml)
  77     {
  78         Book book;
  79         book.id = xml.tag.attr["id"];
  80
  81         xml.onEndTag["author"]       = (in Element e) { book.author      = e.text(); };
  82         xml.onEndTag["title"]        = (in Element e) { book.title       = e.text(); };
  83         xml.onEndTag["genre"]        = (in Element e) { book.genre       = e.text(); };
  84         xml.onEndTag["price"]        = (in Element e) { book.price       = e.text(); };
  85         xml.onEndTag["publish-date"] = (in Element e) { book.pubDate     = e.text(); };
  86         xml.onEndTag["description"]  = (in Element e) { book.description = e.text(); };
  87
  88         xml.parse();
  89
  90         books ~= book;
  91     };
  92     xml.parse();
  93
  94     // Put it back together again;
  95     auto doc = new Document(new Tag("catalog"));
  96     foreach (book;books)
  97     {
  98         auto element = new Element("book");
  99         element.tag.attr["id"] = book.id;
 100
 101         element ~= new Element("author",      book.author);
 102         element ~= new Element("title",       book.title);
 103         element ~= new Element("genre",       book.genre);
 104         element ~= new Element("price",       book.price);
 105         element ~= new Element("publish-date",book.pubDate);
 106         element ~= new Element("description", book.description);
 107
 108         doc ~= element;
 109     }
 110
 111     // Pretty-print it
 112     writefln(join(doc.pretty(3),"\n"));
 113 }
 114 -------------------------------------------------------------------------------
 115 Copyright: Copyright Janice Caron 2008 - 2009.
 116 License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
 117 Authors:   Janice Caron
 118 Source:    $(PHOBOSSRC std/_xml.d)
 119 */
 120 /*
 121          Copyright Janice Caron 2008 - 2009.
 122 Distributed under the Boost Software License, Version 1.0.
 123    (See accompanying file LICENSE_1_0.txt or copy at
 124          http://www.boost.org/LICENSE_1_0.txt)
 125 */
 126 module std.xml;
 127
 128 enum cdata = "<![CDATA[";
 129
 130 /**
 131  * Returns true if the character is a character according to the XML standard
 132  *
 133  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 134  *
 135  * Params:
 136  *    c = the character to be tested
 137  */
 138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
 139 {
 140     if (c <= 0xD7FF)
 141     {
 142         if (c >= 0x20)
 143             return true;
 144         switch (c)
 145         {
 146         case 0xA:
 147         case 0x9:
 148         case 0xD:
 149             return true;
 150         default:
 151             return false;
 152         }
 153     }
 154     else if (0xE000 <= c && c <= 0x10FFFF)
 155     {
 156         if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
 157             return true;
 158     }
 159     return false;
 160 }
 161
 162 @safe @nogc nothrow pure unittest
 163 {
 164     assert(!isChar(cast(dchar) 0x8));
 165     assert( isChar(cast(dchar) 0x9));
 166     assert( isChar(cast(dchar) 0xA));
 167     assert(!isChar(cast(dchar) 0xB));
 168     assert(!isChar(cast(dchar) 0xC));
 169     assert( isChar(cast(dchar) 0xD));
 170     assert(!isChar(cast(dchar) 0xE));
 171     assert(!isChar(cast(dchar) 0x1F));
 172     assert( isChar(cast(dchar) 0x20));
 173     assert( isChar('J'));
 174     assert( isChar(cast(dchar) 0xD7FF));
 175     assert(!isChar(cast(dchar) 0xD800));
 176     assert(!isChar(cast(dchar) 0xDFFF));
 177     assert( isChar(cast(dchar) 0xE000));
 178     assert( isChar(cast(dchar) 0xFFFD));
 179     assert(!isChar(cast(dchar) 0xFFFE));
 180     assert(!isChar(cast(dchar) 0xFFFF));
 181     assert( isChar(cast(dchar) 0x10000));
 182     assert( isChar(cast(dchar) 0x10FFFF));
 183     assert(!isChar(cast(dchar) 0x110000));
 184
 185     debug (stdxml_TestHardcodedChecks)
 186     {
 187         foreach (c; 0 .. dchar.max + 1)
 188             assert(isChar(c) == lookup(CharTable, c));
 189     }
 190 }
 191
 192 /**
 193  * Returns true if the character is whitespace according to the XML standard
 194  *
 195  * Only the following characters are considered whitespace in XML - space, tab,
 196  * carriage return and linefeed
 197  *
 198  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 199  *
 200  * Params:
 201  *    c = the character to be tested
 202  */
 203 bool isSpace(dchar c) @safe @nogc pure nothrow
 204 {
 205     return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
 206 }
 207
 208 /**
 209  * Returns true if the character is a digit according to the XML standard
 210  *
 211  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 212  *
 213  * Params:
 214  *    c = the character to be tested
 215  */
 216 bool isDigit(dchar c) @safe @nogc pure nothrow
 217 {
 218     if (c <= 0x0039 && c >= 0x0030)
 219         return true;
 220     else
 221         return lookup(DigitTable,c);
 222 }
 223
 224 @safe @nogc nothrow pure unittest
 225 {
 226     debug (stdxml_TestHardcodedChecks)
 227     {
 228         foreach (c; 0 .. dchar.max + 1)
 229             assert(isDigit(c) == lookup(DigitTable, c));
 230     }
 231 }
 232
 233 /**
 234  * Returns true if the character is a letter according to the XML standard
 235  *
 236  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 237  *
 238  * Params:
 239  *    c = the character to be tested
 240  */
 241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
 242 {
 243     return isIdeographic(c) || isBaseChar(c);
 244 }
 245
 246 /**
 247  * Returns true if the character is an ideographic character according to the
 248  * XML standard
 249  *
 250  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 251  *
 252  * Params:
 253  *    c = the character to be tested
 254  */
 255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
 256 {
 257     if (c == 0x3007)
 258         return true;
 259     if (c <= 0x3029 && c >= 0x3021 )
 260         return true;
 261     if (c <= 0x9FA5 && c >= 0x4E00)
 262         return true;
 263     return false;
 264 }
 265
 266 @safe @nogc nothrow pure unittest
 267 {
 268     assert(isIdeographic('\u4E00'));
 269     assert(isIdeographic('\u9FA5'));
 270     assert(isIdeographic('\u3007'));
 271     assert(isIdeographic('\u3021'));
 272     assert(isIdeographic('\u3029'));
 273
 274     debug (stdxml_TestHardcodedChecks)
 275     {
 276         foreach (c; 0 .. dchar.max + 1)
 277             assert(isIdeographic(c) == lookup(IdeographicTable, c));
 278     }
 279 }
 280
 281 /**
 282  * Returns true if the character is a base character according to the XML
 283  * standard
 284  *
 285  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 286  *
 287  * Params:
 288  *    c = the character to be tested
 289  */
 290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
 291 {
 292     return lookup(BaseCharTable,c);
 293 }
 294
 295 /**
 296  * Returns true if the character is a combining character according to the
 297  * XML standard
 298  *
 299  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 300  *
 301  * Params:
 302  *    c = the character to be tested
 303  */
 304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
 305 {
 306     return lookup(CombiningCharTable,c);
 307 }
 308
 309 /**
 310  * Returns true if the character is an extender according to the XML standard
 311  *
 312  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 313  *
 314  * Params:
 315  *    c = the character to be tested
 316  */
 317 bool isExtender(dchar c) @safe @nogc nothrow pure
 318 {
 319     return lookup(ExtenderTable,c);
 320 }
 321
 322 /**
 323  * Encodes a string by replacing all characters which need to be escaped with
 324  * appropriate predefined XML entities.
 325  *
 326  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
 327  * and greater-than), and similarly, decode() unescapes them. These functions
 328  * are provided for convenience only. You do not need to use them when using
 329  * the std.xml classes, because then all the encoding and decoding will be done
 330  * for you automatically.
 331  *
 332  * If the string is not modified, the original will be returned.
 333  *
 334  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 335  *
 336  * Params:
 337  *      s = The string to be encoded
 338  *
 339  * Returns: The encoded string
 340  *
 341  * Example:
 342  * --------------
 343  * writefln(encode("a > b")); // writes "a &gt; b"
 344  * --------------
 345  */
 346 S encode(S)(S s)
 347 {
 348     import std.array : appender;
 349
 350     string r;
 351     size_t lastI;
 352     auto result = appender!S();
 353
 354     foreach (i, c; s)
 355     {
 356         switch (c)
 357         {
 358         case '&':  r = "&amp;"; break;
 359         case '"':  r = "&quot;"; break;
 360         case '\'': r = "&apos;"; break;
 361         case '<':  r = "&lt;"; break;
 362         case '>':  r = "&gt;"; break;
 363         default: continue;
 364         }
 365         // Replace with r
 366         result.put(s[lastI .. i]);
 367         result.put(r);
 368         lastI = i + 1;
 369     }
 370
 371     if (!result.data.ptr) return s;
 372     result.put(s[lastI .. $]);
 373     return result.data;
 374 }
 375
 376 @safe pure unittest
 377 {
 378     auto s = "hello";
 379     assert(encode(s) is s);
 380     assert(encode("a > b") == "a &gt; b", encode("a > b"));
 381     assert(encode("a < b") == "a &lt; b");
 382     assert(encode("don't") == "don&apos;t");
 383     assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
 384     assert(encode("cat & dog") == "cat &amp; dog");
 385 }
 386
 387 /**
 388  * Mode to use for decoding.
 389  *
 390  * $(DDOC_ENUM_MEMBERS NONE) Do not decode
 391  * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
 392  * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
 393  */
 394 enum DecodeMode
 395 {
 396     NONE, LOOSE, STRICT
 397 }
 398
 399 /**
 400  * Decodes a string by unescaping all predefined XML entities.
 401  *
 402  * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
 403  * and greater-than), and similarly, decode() unescapes them. These functions
 404  * are provided for convenience only. You do not need to use them when using
 405  * the std.xml classes, because then all the encoding and decoding will be done
 406  * for you automatically.
 407  *
 408  * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
 409  * &amp;lt; and &amp;gt,
 410  * as well as decimal and hexadecimal entities such as &amp;#x20AC;
 411  *
 412  * If the string does not contain an ampersand, the original will be returned.
 413  *
 414  * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
 415  * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
 416  * (decode, and throw a DecodeException in the event of an error).
 417  *
 418  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 419  *
 420  * Params:
 421  *      s = The string to be decoded
 422  *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
 423  *
 424  * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
 425  *
 426  * Returns: The decoded string
 427  *
 428  * Example:
 429  * --------------
 430  * writefln(decode("a &gt; b")); // writes "a > b"
 431  * --------------
 432  */
 433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
 434 {
 435     import std.algorithm.searching : startsWith;
 436
 437     if (mode == DecodeMode.NONE) return s;
 438
 439     string buffer;
 440     foreach (ref i; 0 .. s.length)
 441     {
 442         char c = s[i];
 443         if (c != '&')
 444         {
 445             if (buffer.length != 0) buffer ~= c;
 446         }
 447         else
 448         {
 449             if (buffer.length == 0)
 450             {
 451                 buffer = s[0 .. i].dup;
 452             }
 453             if (startsWith(s[i..$],"&#"))
 454             {
 455                 try
 456                 {
 457                     dchar d;
 458                     string t = s[i..$];
 459                     checkCharRef(t, d);
 460                     char[4] temp;
 461                     import std.utf : encode;
 462                     buffer ~= temp[0 .. encode(temp, d)];
 463                     i = s.length - t.length - 1;
 464                 }
 465                 catch (Err e)
 466                 {
 467                     if (mode == DecodeMode.STRICT)
 468                         throw new DecodeException("Unescaped &");
 469                     buffer ~= '&';
 470                 }
 471             }
 472             else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&';  i += 4; }
 473             else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"';  i += 5; }
 474             else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
 475             else if (startsWith(s[i..$],"&lt;"  )) { buffer ~= '<';  i += 3; }
 476             else if (startsWith(s[i..$],"&gt;"  )) { buffer ~= '>';  i += 3; }
 477             else
 478             {
 479                 if (mode == DecodeMode.STRICT)
 480                     throw new DecodeException("Unescaped &");
 481                 buffer ~= '&';
 482             }
 483         }
 484     }
 485     return (buffer.length == 0) ? s : buffer;
 486 }
 487
 488 @safe pure unittest
 489 {
 490     void assertNot(string s) pure
 491     {
 492         bool b = false;
 493         try { decode(s,DecodeMode.STRICT); }
 494         catch (DecodeException e) { b = true; }
 495         assert(b,s);
 496     }
 497
 498     // Assert that things that should work, do
 499     auto s = "hello";
 500     assert(decode(s,                DecodeMode.STRICT) is s);
 501     assert(decode("a &gt; b",       DecodeMode.STRICT) == "a > b");
 502     assert(decode("a &lt; b",       DecodeMode.STRICT) == "a < b");
 503     assert(decode("don&apos;t",     DecodeMode.STRICT) == "don't");
 504     assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
 505     assert(decode("cat &amp; dog",  DecodeMode.STRICT) == "cat & dog");
 506     assert(decode("&#42;",          DecodeMode.STRICT) == "*");
 507     assert(decode("&#x2A;",         DecodeMode.STRICT) == "*");
 508     assert(decode("cat & dog",      DecodeMode.LOOSE) == "cat & dog");
 509     assert(decode("a &gt b",        DecodeMode.LOOSE) == "a &gt b");
 510     assert(decode("&#;",            DecodeMode.LOOSE) == "&#;");
 511     assert(decode("&#x;",           DecodeMode.LOOSE) == "&#x;");
 512     assert(decode("&#2G;",          DecodeMode.LOOSE) == "&#2G;");
 513     assert(decode("&#x2G;",         DecodeMode.LOOSE) == "&#x2G;");
 514
 515     // Assert that things that shouldn't work, don't
 516     assertNot("cat & dog");
 517     assertNot("a &gt b");
 518     assertNot("&#;");
 519     assertNot("&#x;");
 520     assertNot("&#2G;");
 521     assertNot("&#x2G;");
 522 }
 523
 524 /**
 525  * Class representing an XML document.
 526  *
 527  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 528  *
 529  */
 530 class Document : Element
 531 {
 532     /**
 533      * Contains all text which occurs before the root element.
 534      * Defaults to &lt;?xml version="1.0"?&gt;
 535      */
 536     string prolog = "<?xml version=\"1.0\"?>";
 537     /**
 538      * Contains all text which occurs after the root element.
 539      * Defaults to the empty string
 540      */
 541     string epilog;
 542
 543     /**
 544      * Constructs a Document by parsing XML text.
 545      *
 546      * This function creates a complete DOM (Document Object Model) tree.
 547      *
 548      * The input to this function MUST be valid XML.
 549      * This is enforced by DocumentParser's in contract.
 550      *
 551      * Params:
 552      *      s = the complete XML text.
 553      */
 554     this(string s)
 555     in
 556     {
 557         assert(s.length != 0);
 558     }
 559     body
 560     {
 561         auto xml = new DocumentParser(s);
 562         string tagString = xml.tag.tagString;
 563
 564         this(xml.tag);
 565         prolog = s[0 .. tagString.ptr - s.ptr];
 566         parse(xml);
 567         epilog = *xml.s;
 568     }
 569
 570     /**
 571      * Constructs a Document from a Tag.
 572      *
 573      * Params:
 574      *      tag = the start tag of the document.
 575      */
 576     this(const(Tag) tag)
 577     {
 578         super(tag);
 579     }
 580
 581     const
 582     {
 583         /**
 584          * Compares two Documents for equality
 585          *
 586          * Example:
 587          * --------------
 588          * Document d1,d2;
 589          * if (d1 == d2) { }
 590          * --------------
 591          */
 592         override bool opEquals(scope const Object o) const
 593         {
 594             const doc = toType!(const Document)(o);
 595             return prolog == doc.prolog
 596                 && (cast(const) this).Element.opEquals(cast(const) doc)
 597                 && epilog == doc.epilog;
 598         }
 599
 600         /**
 601          * Compares two Documents
 602          *
 603          * You should rarely need to call this function. It exists so that
 604          * Documents can be used as associative array keys.
 605          *
 606          * Example:
 607          * --------------
 608          * Document d1,d2;
 609          * if (d1 < d2) { }
 610          * --------------
 611          */
 612         override int opCmp(scope const Object o) scope const
 613         {
 614             const doc = toType!(const Document)(o);
 615             if (prolog != doc.prolog)
 616                 return prolog < doc.prolog ? -1 : 1;
 617             if (int cmp = this.Element.opCmp(doc))
 618                 return cmp;
 619             if (epilog != doc.epilog)
 620                 return epilog < doc.epilog ? -1 : 1;
 621             return 0;
 622         }
 623
 624         /**
 625          * Returns the hash of a Document
 626          *
 627          * You should rarely need to call this function. It exists so that
 628          * Documents can be used as associative array keys.
 629          */
 630         override size_t toHash() scope const @trusted
 631         {
 632             return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
 633         }
 634
 635         /**
 636          * Returns the string representation of a Document. (That is, the
 637          * complete XML of a document).
 638          */
 639         override string toString() scope const @safe
 640         {
 641             return prolog ~ super.toString() ~ epilog;
 642         }
 643     }
 644 }
 645
 646 @system unittest
 647 {
 648     // https://issues.dlang.org/show_bug.cgi?id=14966
 649     auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
 650
 651     auto a = new Document(xml);
 652     auto b = new Document(xml);
 653     assert(a == b);
 654     assert(!(a < b));
 655     int[Document] aa;
 656     aa[a] = 1;
 657     assert(aa[b] == 1);
 658
 659     b ~= new Element("b");
 660     assert(a < b);
 661     assert(b > a);
 662 }
 663
 664 /**
 665  * Class representing an XML element.
 666  *
 667  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 668  */
 669 class Element : Item
 670 {
 671     Tag tag; /// The start tag of the element
 672     Item[] items; /// The element's items
 673     Text[] texts; /// The element's text items
 674     CData[] cdatas; /// The element's CData items
 675     Comment[] comments; /// The element's comments
 676     ProcessingInstruction[] pis; /// The element's processing instructions
 677     Element[] elements; /// The element's child elements
 678
 679     /**
 680      * Constructs an Element given a name and a string to be used as a Text
 681      * interior.
 682      *
 683      * Params:
 684      *      name = the name of the element.
 685      *      interior = (optional) the string interior.
 686      *
 687      * Example:
 688      * -------------------------------------------------------
 689      * auto element = new Element("title","Serenity")
 690      *     // constructs the element <title>Serenity</title>
 691      * -------------------------------------------------------
 692      */
 693     this(string name, string interior=null) @safe pure
 694     {
 695         this(new Tag(name));
 696         if (interior.length != 0) opCatAssign(new Text(interior));
 697     }
 698
 699     /**
 700      * Constructs an Element from a Tag.
 701      *
 702      * Params:
 703      *      tag_ = the start or empty tag of the element.
 704      */
 705     this(const(Tag) tag_) @safe pure
 706     {
 707         this.tag = new Tag(tag_.name);
 708         tag.type = TagType.EMPTY;
 709         foreach (k,v;tag_.attr) tag.attr[k] = v;
 710         tag.tagString = tag_.tagString;
 711     }
 712
 713     /**
 714      * Append a text item to the interior of this element
 715      *
 716      * Params:
 717      *      item = the item you wish to append.
 718      *
 719      * Example:
 720      * --------------
 721      * Element element;
 722      * element ~= new Text("hello");
 723      * --------------
 724      */
 725     void opCatAssign(Text item) @safe pure
 726     {
 727         texts ~= item;
 728         appendItem(item);
 729     }
 730
 731     /**
 732      * Append a CData item to the interior of this element
 733      *
 734      * Params:
 735      *      item = the item you wish to append.
 736      *
 737      * Example:
 738      * --------------
 739      * Element element;
 740      * element ~= new CData("hello");
 741      * --------------
 742      */
 743     void opCatAssign(CData item) @safe pure
 744     {
 745         cdatas ~= item;
 746         appendItem(item);
 747     }
 748
 749     /**
 750      * Append a comment to the interior of this element
 751      *
 752      * Params:
 753      *      item = the item you wish to append.
 754      *
 755      * Example:
 756      * --------------
 757      * Element element;
 758      * element ~= new Comment("hello");
 759      * --------------
 760      */
 761     void opCatAssign(Comment item) @safe pure
 762     {
 763         comments ~= item;
 764         appendItem(item);
 765     }
 766
 767     /**
 768      * Append a processing instruction to the interior of this element
 769      *
 770      * Params:
 771      *      item = the item you wish to append.
 772      *
 773      * Example:
 774      * --------------
 775      * Element element;
 776      * element ~= new ProcessingInstruction("hello");
 777      * --------------
 778      */
 779     void opCatAssign(ProcessingInstruction item) @safe pure
 780     {
 781         pis ~= item;
 782         appendItem(item);
 783     }
 784
 785     /**
 786      * Append a complete element to the interior of this element
 787      *
 788      * Params:
 789      *      item = the item you wish to append.
 790      *
 791      * Example:
 792      * --------------
 793      * Element element;
 794      * Element other = new Element("br");
 795      * element ~= other;
 796      *    // appends element representing <br />
 797      * --------------
 798      */
 799     void opCatAssign(Element item) @safe pure
 800     {
 801         elements ~= item;
 802         appendItem(item);
 803     }
 804
 805     private void appendItem(Item item) @safe pure
 806     {
 807         items ~= item;
 808         if (tag.type == TagType.EMPTY && !item.isEmptyXML)
 809             tag.type = TagType.START;
 810     }
 811
 812     private void parse(ElementParser xml)
 813     {
 814         xml.onText = (string s) { opCatAssign(new Text(s)); };
 815         xml.onCData = (string s) { opCatAssign(new CData(s)); };
 816         xml.onComment = (string s) { opCatAssign(new Comment(s)); };
 817         xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
 818
 819         xml.onStartTag[null] = (ElementParser xml)
 820         {
 821             auto e = new Element(xml.tag);
 822             e.parse(xml);
 823             opCatAssign(e);
 824         };
 825
 826         xml.parse();
 827     }
 828
 829     /**
 830      * Compares two Elements for equality
 831      *
 832      * Example:
 833      * --------------
 834      * Element e1,e2;
 835      * if (e1 == e2) { }
 836      * --------------
 837      */
 838     override bool opEquals(scope const Object o) const
 839     {
 840         const element = toType!(const Element)(o);
 841         immutable len = items.length;
 842         if (len != element.items.length) return false;
 843         foreach (i; 0 .. len)
 844         {
 845             if (!items[i].opEquals(element.items[i])) return false;
 846         }
 847         return true;
 848     }
 849
 850     /**
 851      * Compares two Elements
 852      *
 853      * You should rarely need to call this function. It exists so that Elements
 854      * can be used as associative array keys.
 855      *
 856      * Example:
 857      * --------------
 858      * Element e1,e2;
 859      * if (e1 < e2) { }
 860      * --------------
 861      */
 862     override int opCmp(scope const Object o) @safe const
 863     {
 864         const element = toType!(const Element)(o);
 865         for (uint i=0; ; ++i)
 866         {
 867             if (i == items.length && i == element.items.length) return 0;
 868             if (i == items.length) return -1;
 869             if (i == element.items.length) return 1;
 870             if (!items[i].opEquals(element.items[i]))
 871                 return items[i].opCmp(element.items[i]);
 872         }
 873     }
 874
 875     /**
 876      * Returns the hash of an Element
 877      *
 878      * You should rarely need to call this function. It exists so that Elements
 879      * can be used as associative array keys.
 880      */
 881     override size_t toHash() scope const @safe
 882     {
 883         size_t hash = tag.toHash();
 884         foreach (item;items) hash += item.toHash();
 885         return hash;
 886     }
 887
 888     const
 889     {
 890         /**
 891          * Returns the decoded interior of an element.
 892          *
 893          * The element is assumed to contain text <i>only</i>. So, for
 894          * example, given XML such as "&lt;title&gt;Good &amp;amp;
 895          * Bad&lt;/title&gt;", will return "Good &amp; Bad".
 896          *
 897          * Params:
 898          *      mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
 899          *
 900          * Throws: DecodeException if decode fails
 901          */
 902         string text(DecodeMode mode=DecodeMode.LOOSE)
 903         {
 904             string buffer;
 905             foreach (item;items)
 906             {
 907                 Text t = cast(Text) item;
 908                 if (t is null) throw new DecodeException(item.toString());
 909                 buffer ~= decode(t.toString(),mode);
 910             }
 911             return buffer;
 912         }
 913
 914         /**
 915          * Returns an indented string representation of this item
 916          *
 917          * Params:
 918          *      indent = (optional) number of spaces by which to indent this
 919          *          element. Defaults to 2.
 920          */
 921         override string[] pretty(uint indent=2) scope
 922         {
 923             import std.algorithm.searching : count;
 924             import std.string : rightJustify;
 925
 926             if (isEmptyXML) return [ tag.toEmptyString() ];
 927
 928             if (items.length == 1)
 929             {
 930                 auto t = cast(const(Text))(items[0]);
 931                 if (t !is null)
 932                 {
 933                     return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
 934                 }
 935             }
 936
 937             string[] a = [ tag.toStartString() ];
 938             foreach (item;items)
 939             {
 940                 string[] b = item.pretty(indent);
 941                 foreach (s;b)
 942                 {
 943                     a ~= rightJustify(s,count(s) + indent);
 944                 }
 945             }
 946             a ~= tag.toEndString();
 947             return a;
 948         }
 949
 950         /**
 951          * Returns the string representation of an Element
 952          *
 953          * Example:
 954          * --------------
 955          * auto element = new Element("br");
 956          * writefln(element.toString()); // writes "<br />"
 957          * --------------
 958          */
 959         override string toString() scope @safe
 960         {
 961             if (isEmptyXML) return tag.toEmptyString();
 962
 963             string buffer = tag.toStartString();
 964             foreach (item;items) { buffer ~= item.toString(); }
 965             buffer ~= tag.toEndString();
 966             return buffer;
 967         }
 968
 969         override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
 970     }
 971 }
 972
 973 /**
 974  * Tag types.
 975  *
 976  * $(DDOC_ENUM_MEMBERS START) Used for start tags
 977  * $(DDOC_ENUM_MEMBERS END) Used for end tags
 978  * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
 979  *
 980  */
 981 enum TagType { START, END, EMPTY }
 982
 983 /**
 984  * Class representing an XML tag.
 985  *
 986  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
 987  *
 988  * The class invariant guarantees
 989  * <ul>
 990  * <li> that $(B type) is a valid enum TagType value</li>
 991  * <li> that $(B name) consists of valid characters</li>
 992  * <li> that each attribute name consists of valid characters</li>
 993  * </ul>
 994  */
 995 class Tag
 996 {
 997     TagType type = TagType.START;   /// Type of tag
 998     string name;                    /// Tag name
 999     string[string] attr;            /// Associative array of attributes
1000     private string tagString;
1001
1002     invariant()
1003     {
1004         string s;
1005         string t;
1006
1007         assert(type == TagType.START
1008             || type == TagType.END
1009             || type == TagType.EMPTY);
1010
1011         s = name;
1012         try { checkName(s,t); }
1013         catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1014
1015         foreach (k,v;attr)
1016         {
1017             s = k;
1018             try { checkName(s,t); }
1019             catch (Err e)
1020                 { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1021         }
1022     }
1023
1024     /**
1025      * Constructs an instance of Tag with a specified name and type
1026      *
1027      * The constructor does not initialize the attributes. To initialize the
1028      * attributes, you access the $(B attr) member variable.
1029      *
1030      * Params:
1031      *      name = the Tag's name
1032      *      type = (optional) the Tag's type. If omitted, defaults to
1033      *          TagType.START.
1034      *
1035      * Example:
1036      * --------------
1037      * auto tag = new Tag("img",Tag.EMPTY);
1038      * tag.attr["src"] = "http://example.com/example.jpg";
1039      * --------------
1040      */
1041     this(string name, TagType type=TagType.START) @safe pure
1042     {
1043         this.name = name;
1044         this.type = type;
1045     }
1046
1047     /* Private constructor (so don't ddoc this!)
1048      *
1049      * Constructs a Tag by parsing the string representation, e.g. "<html>".
1050      *
1051      * The string is passed by reference, and is advanced over all characters
1052      * consumed.
1053      *
1054      * The second parameter is a dummy parameter only, required solely to
1055      * distinguish this constructor from the public one.
1056      */
1057     private this(ref string s, bool dummy) @safe pure
1058     {
1059         import std.algorithm.searching : countUntil;
1060         import std.ascii : isWhite;
1061         import std.utf : byCodeUnit;
1062
1063         tagString = s;
1064         try
1065         {
1066             reqc(s,'<');
1067             if (optc(s,'/')) type = TagType.END;
1068             ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1069             name = s[0 .. i];
1070             s = s[i .. $];
1071
1072             i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1073             s = s[i .. $];
1074
1075             while (s.length > 0 && s[0] != '>' && s[0] != '/')
1076             {
1077                 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078                 string key = s[0 .. i];
1079                 s = s[i .. $];
1080
1081                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1082                 s = s[i .. $];
1083                 reqc(s,'=');
1084                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1085                 s = s[i .. $];
1086
1087                 immutable char quote = requireOneOf(s,"'\"");
1088                 i = s.byCodeUnit.countUntil(quote);
1089                 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1090                 s = s[i .. $];
1091                 reqc(s,quote);
1092
1093                 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1094                 s = s[i .. $];
1095                 attr[key] = val;
1096             }
1097             if (optc(s,'/'))
1098             {
1099                 if (type == TagType.END) throw new TagException("");
1100                 type = TagType.EMPTY;
1101             }
1102             reqc(s,'>');
1103             tagString.length = tagString.length - s.length;
1104         }
1105         catch (XMLException e)
1106         {
1107             tagString.length = tagString.length - s.length;
1108             throw new TagException(tagString);
1109         }
1110     }
1111
1112     const
1113     {
1114         /**
1115          * Compares two Tags for equality
1116          *
1117          * You should rarely need to call this function. It exists so that Tags
1118          * can be used as associative array keys.
1119          *
1120          * Example:
1121          * --------------
1122          * Tag tag1,tag2
1123          * if (tag1 == tag2) { }
1124          * --------------
1125          */
1126         override bool opEquals(scope Object o)
1127         {
1128             const tag = toType!(const Tag)(o);
1129             return
1130                 (name != tag.name) ? false : (
1131                 (attr != tag.attr) ? false : (
1132                 (type != tag.type) ? false : (
1133             true )));
1134         }
1135
1136         /**
1137          * Compares two Tags
1138          *
1139          * Example:
1140          * --------------
1141          * Tag tag1,tag2
1142          * if (tag1 < tag2) { }
1143          * --------------
1144          */
1145         override int opCmp(Object o)
1146         {
1147             const tag = toType!(const Tag)(o);
1148             // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1149             return
1150                 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1151                 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1152                 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1153             0 )));
1154         }
1155
1156         /**
1157          * Returns the hash of a Tag
1158          *
1159          * You should rarely need to call this function. It exists so that Tags
1160          * can be used as associative array keys.
1161          */
1162         override size_t toHash()
1163         {
1164             return typeid(name).getHash(&name);
1165         }
1166
1167         /**
1168          * Returns the string representation of a Tag
1169          *
1170          * Example:
1171          * --------------
1172          * auto tag = new Tag("book",TagType.START);
1173          * writefln(tag.toString()); // writes "<book>"
1174          * --------------
1175          */
1176         override string toString() @safe
1177         {
1178             if (isEmpty) return toEmptyString();
1179             return (isEnd) ? toEndString() : toStartString();
1180         }
1181
1182         private
1183         {
1184             string toNonEndString() @safe
1185             {
1186                 import std.format : format;
1187
1188                 string s = "<" ~ name;
1189                 foreach (key,val;attr)
1190                     s ~= format(" %s=\"%s\"",key,encode(val));
1191                 return s;
1192             }
1193
1194             string toStartString() @safe { return toNonEndString() ~ ">"; }
1195
1196             string toEndString() @safe { return "</" ~ name ~ ">"; }
1197
1198             string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1199         }
1200
1201         /**
1202          * Returns true if the Tag is a start tag
1203          *
1204          * Example:
1205          * --------------
1206          * if (tag.isStart) { }
1207          * --------------
1208          */
1209         @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1210
1211         /**
1212          * Returns true if the Tag is an end tag
1213          *
1214          * Example:
1215          * --------------
1216          * if (tag.isEnd) { }
1217          * --------------
1218          */
1219         @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END;   }
1220
1221         /**
1222          * Returns true if the Tag is an empty tag
1223          *
1224          * Example:
1225          * --------------
1226          * if (tag.isEmpty) { }
1227          * --------------
1228          */
1229         @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1230     }
1231 }
1232
1233 /**
1234  * Class representing a comment
1235  */
1236 class Comment : Item
1237 {
1238     private string content;
1239
1240     /**
1241      * Construct a comment
1242      *
1243      * Params:
1244      *      content = the body of the comment
1245      *
1246      * Throws: CommentException if the comment body is illegal (contains "--"
1247      * or exactly equals "-")
1248      *
1249      * Example:
1250      * --------------
1251      * auto item = new Comment("This is a comment");
1252      *    // constructs <!--This is a comment-->
1253      * --------------
1254      */
1255     this(string content) @safe pure
1256     {
1257         import std.string : indexOf;
1258
1259         if (content == "-" || content.indexOf("--") != -1)
1260             throw new CommentException(content);
1261         this.content = content;
1262     }
1263
1264     /**
1265      * Compares two comments for equality
1266      *
1267      * Example:
1268      * --------------
1269      * Comment item1,item2;
1270      * if (item1 == item2) { }
1271      * --------------
1272      */
1273     override bool opEquals(scope const Object o) const
1274     {
1275         const item = toType!(const Item)(o);
1276         const t = cast(const Comment) item;
1277         return t !is null && content == t.content;
1278     }
1279
1280     /**
1281      * Compares two comments
1282      *
1283      * You should rarely need to call this function. It exists so that Comments
1284      * can be used as associative array keys.
1285      *
1286      * Example:
1287      * --------------
1288      * Comment item1,item2;
1289      * if (item1 < item2) { }
1290      * --------------
1291      */
1292     override int opCmp(scope const Object o) scope const
1293     {
1294         const item = toType!(const Item)(o);
1295         const t = cast(const Comment) item;
1296         return t !is null && (content != t.content
1297             ? (content < t.content ? -1 : 1 ) : 0 );
1298     }
1299
1300     /**
1301      * Returns the hash of a Comment
1302      *
1303      * You should rarely need to call this function. It exists so that Comments
1304      * can be used as associative array keys.
1305      */
1306     override size_t toHash() scope const nothrow { return hash(content); }
1307
1308     /**
1309      * Returns a string representation of this comment
1310      */
1311     override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1312
1313     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1314 }
1315
1316 @safe unittest // issue 16241
1317 {
1318     import std.exception : assertThrown;
1319     auto c = new Comment("==");
1320     assert(c.content == "==");
1321     assertThrown!CommentException(new Comment("--"));
1322 }
1323
1324 /**
1325  * Class representing a Character Data section
1326  */
1327 class CData : Item
1328 {
1329     private string content;
1330
1331     /**
1332      * Construct a character data section
1333      *
1334      * Params:
1335      *      content = the body of the character data segment
1336      *
1337      * Throws: CDataException if the segment body is illegal (contains "]]>")
1338      *
1339      * Example:
1340      * --------------
1341      * auto item = new CData("<b>hello</b>");
1342      *    // constructs <![CDATA[<b>hello</b>]]>
1343      * --------------
1344      */
1345     this(string content) @safe pure
1346     {
1347         import std.string : indexOf;
1348         if (content.indexOf("]]>") != -1) throw new CDataException(content);
1349         this.content = content;
1350     }
1351
1352     /**
1353      * Compares two CDatas for equality
1354      *
1355      * Example:
1356      * --------------
1357      * CData item1,item2;
1358      * if (item1 == item2) { }
1359      * --------------
1360      */
1361     override bool opEquals(scope const Object o) const
1362     {
1363         const item = toType!(const Item)(o);
1364         const t = cast(const CData) item;
1365         return t !is null && content == t.content;
1366     }
1367
1368     /**
1369      * Compares two CDatas
1370      *
1371      * You should rarely need to call this function. It exists so that CDatas
1372      * can be used as associative array keys.
1373      *
1374      * Example:
1375      * --------------
1376      * CData item1,item2;
1377      * if (item1 < item2) { }
1378      * --------------
1379      */
1380     override int opCmp(scope const Object o) scope const
1381     {
1382         const item = toType!(const Item)(o);
1383         const t = cast(const CData) item;
1384         return t !is null && (content != t.content
1385             ? (content < t.content ? -1 : 1 ) : 0 );
1386     }
1387
1388     /**
1389      * Returns the hash of a CData
1390      *
1391      * You should rarely need to call this function. It exists so that CDatas
1392      * can be used as associative array keys.
1393      */
1394     override size_t toHash() scope const nothrow { return hash(content); }
1395
1396     /**
1397      * Returns a string representation of this CData section
1398      */
1399     override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1400
1401     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1402 }
1403
1404 /**
1405  * Class representing a text (aka Parsed Character Data) section
1406  */
1407 class Text : Item
1408 {
1409     private string content;
1410
1411     /**
1412      * Construct a text (aka PCData) section
1413      *
1414      * Params:
1415      *      content = the text. This function encodes the text before
1416      *      insertion, so it is safe to insert any text
1417      *
1418      * Example:
1419      * --------------
1420      * auto Text = new CData("a < b");
1421      *    // constructs a &lt; b
1422      * --------------
1423      */
1424     this(string content) @safe pure
1425     {
1426         this.content = encode(content);
1427     }
1428
1429     /**
1430      * Compares two text sections for equality
1431      *
1432      * Example:
1433      * --------------
1434      * Text item1,item2;
1435      * if (item1 == item2) { }
1436      * --------------
1437      */
1438     override bool opEquals(scope const Object o) const
1439     {
1440         const item = toType!(const Item)(o);
1441         const t = cast(const Text) item;
1442         return t !is null && content == t.content;
1443     }
1444
1445     /**
1446      * Compares two text sections
1447      *
1448      * You should rarely need to call this function. It exists so that Texts
1449      * can be used as associative array keys.
1450      *
1451      * Example:
1452      * --------------
1453      * Text item1,item2;
1454      * if (item1 < item2) { }
1455      * --------------
1456      */
1457     override int opCmp(scope const Object o) scope const
1458     {
1459         const item = toType!(const Item)(o);
1460         const t = cast(const Text) item;
1461         return t !is null
1462             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1463     }
1464
1465     /**
1466      * Returns the hash of a text section
1467      *
1468      * You should rarely need to call this function. It exists so that Texts
1469      * can be used as associative array keys.
1470      */
1471     override size_t toHash() scope const nothrow { return hash(content); }
1472
1473     /**
1474      * Returns a string representation of this Text section
1475      */
1476     override string toString() scope const @safe @nogc pure nothrow { return content; }
1477
1478     /**
1479      * Returns true if the content is the empty string
1480      */
1481     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1482 }
1483
1484 /**
1485  * Class representing an XML Instruction section
1486  */
1487 class XMLInstruction : Item
1488 {
1489     private string content;
1490
1491     /**
1492      * Construct an XML Instruction section
1493      *
1494      * Params:
1495      *      content = the body of the instruction segment
1496      *
1497      * Throws: XIException if the segment body is illegal (contains ">")
1498      *
1499      * Example:
1500      * --------------
1501      * auto item = new XMLInstruction("ATTLIST");
1502      *    // constructs <!ATTLIST>
1503      * --------------
1504      */
1505     this(string content) @safe pure
1506     {
1507         import std.string : indexOf;
1508         if (content.indexOf(">") != -1) throw new XIException(content);
1509         this.content = content;
1510     }
1511
1512     /**
1513      * Compares two XML instructions for equality
1514      *
1515      * Example:
1516      * --------------
1517      * XMLInstruction item1,item2;
1518      * if (item1 == item2) { }
1519      * --------------
1520      */
1521     override bool opEquals(scope const Object o) const
1522     {
1523         const item = toType!(const Item)(o);
1524         const t = cast(const XMLInstruction) item;
1525         return t !is null && content == t.content;
1526     }
1527
1528     /**
1529      * Compares two XML instructions
1530      *
1531      * You should rarely need to call this function. It exists so that
1532      * XmlInstructions can be used as associative array keys.
1533      *
1534      * Example:
1535      * --------------
1536      * XMLInstruction item1,item2;
1537      * if (item1 < item2) { }
1538      * --------------
1539      */
1540     override int opCmp(scope const Object o) scope const
1541     {
1542         const item = toType!(const Item)(o);
1543         const t = cast(const XMLInstruction) item;
1544         return t !is null
1545             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1546     }
1547
1548     /**
1549      * Returns the hash of an XMLInstruction
1550      *
1551      * You should rarely need to call this function. It exists so that
1552      * XmlInstructions can be used as associative array keys.
1553      */
1554     override size_t toHash() scope const nothrow { return hash(content); }
1555
1556     /**
1557      * Returns a string representation of this XmlInstruction
1558      */
1559     override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1560
1561     override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1562 }
1563
1564 /**
1565  * Class representing a Processing Instruction section
1566  */
1567 class ProcessingInstruction : Item
1568 {
1569     private string content;
1570
1571     /**
1572      * Construct a Processing Instruction section
1573      *
1574      * Params:
1575      *      content = the body of the instruction segment
1576      *
1577      * Throws: PIException if the segment body is illegal (contains "?>")
1578      *
1579      * Example:
1580      * --------------
1581      * auto item = new ProcessingInstruction("php");
1582      *    // constructs <?php?>
1583      * --------------
1584      */
1585     this(string content) @safe pure
1586     {
1587         import std.string : indexOf;
1588         if (content.indexOf("?>") != -1) throw new PIException(content);
1589         this.content = content;
1590     }
1591
1592     /**
1593      * Compares two processing instructions for equality
1594      *
1595      * Example:
1596      * --------------
1597      * ProcessingInstruction item1,item2;
1598      * if (item1 == item2) { }
1599      * --------------
1600      */
1601     override bool opEquals(scope const Object o) const
1602     {
1603         const item = toType!(const Item)(o);
1604         const t = cast(const ProcessingInstruction) item;
1605         return t !is null && content == t.content;
1606     }
1607
1608     /**
1609      * Compares two processing instructions
1610      *
1611      * You should rarely need to call this function. It exists so that
1612      * ProcessingInstructions can be used as associative array keys.
1613      *
1614      * Example:
1615      * --------------
1616      * ProcessingInstruction item1,item2;
1617      * if (item1 < item2) { }
1618      * --------------
1619      */
1620     override int opCmp(scope const Object o) scope const
1621     {
1622         const item = toType!(const Item)(o);
1623         const t = cast(const ProcessingInstruction) item;
1624         return t !is null
1625             && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1626     }
1627
1628     /**
1629      * Returns the hash of a ProcessingInstruction
1630      *
1631      * You should rarely need to call this function. It exists so that
1632      * ProcessingInstructions can be used as associative array keys.
1633      */
1634     override size_t toHash() scope const nothrow { return hash(content); }
1635
1636     /**
1637      * Returns a string representation of this ProcessingInstruction
1638      */
1639     override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1640
1641     override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1642 }
1643
1644 /**
1645  * Abstract base class for XML items
1646  */
1647 abstract class Item
1648 {
1649     /// Compares with another Item of same type for equality
1650     abstract override bool opEquals(scope const Object o) @safe const;
1651
1652     /// Compares with another Item of same type
1653     abstract override int opCmp(scope const Object o) @safe const;
1654
1655     /// Returns the hash of this item
1656     abstract override size_t toHash() @safe scope const;
1657
1658     /// Returns a string representation of this item
1659     abstract override string toString() @safe scope const;
1660
1661     /**
1662      * Returns an indented string representation of this item
1663      *
1664      * Params:
1665      *      indent = number of spaces by which to indent child elements
1666      */
1667     string[] pretty(uint indent) @safe scope const
1668     {
1669         import std.string : strip;
1670         string s = strip(toString());
1671         return s.length == 0 ? [] : [ s ];
1672     }
1673
1674     /// Returns true if the item represents empty XML text
1675     abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1676 }
1677
1678 /**
1679  * Class for parsing an XML Document.
1680  *
1681  * This is a subclass of ElementParser. Most of the useful functions are
1682  * documented there.
1683  *
1684  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1685  *
1686  * Bugs:
1687  *      Currently only supports UTF documents.
1688  *
1689  *      If there is an encoding attribute in the prolog, it is ignored.
1690  *
1691  */
1692 class DocumentParser : ElementParser
1693 {
1694     string xmlText;
1695
1696     /**
1697      * Constructs a DocumentParser.
1698      *
1699      * The input to this function MUST be valid XML.
1700      * This is enforced by the function's in contract.
1701      *
1702      * Params:
1703      *      xmlText_ = the entire XML document as text
1704      *
1705      */
1706     this(string xmlText_)
1707     in
1708     {
1709         assert(xmlText_.length != 0);
1710         try
1711         {
1712             // Confirm that the input is valid XML
1713             check(xmlText_);
1714         }
1715         catch (CheckException e)
1716         {
1717             // And if it's not, tell the user why not
1718             assert(false, "\n" ~ e.toString());
1719         }
1720     }
1721     body
1722     {
1723         xmlText = xmlText_;
1724         s = &xmlText;
1725         super();    // Initialize everything
1726         parse();    // Parse through the root tag (but not beyond)
1727     }
1728 }
1729
1730 @system unittest
1731 {
1732     auto doc = new Document("<root><child><grandchild/></child></root>");
1733     assert(doc.elements.length == 1);
1734     assert(doc.elements[0].tag.name == "child");
1735     assert(doc.items == doc.elements);
1736 }
1737
1738 /**
1739  * Class for parsing an XML element.
1740  *
1741  * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1742  *
1743  * Note that you cannot construct instances of this class directly. You can
1744  * construct a DocumentParser (which is a subclass of ElementParser), but
1745  * otherwise, Instances of ElementParser will be created for you by the
1746  * library, and passed your way via onStartTag handlers.
1747  *
1748  */
1749 class ElementParser
1750 {
1751     alias Handler = void delegate(string);
1752     alias ElementHandler = void delegate(in Element element);
1753     alias ParserHandler = void delegate(ElementParser parser);
1754
1755     private
1756     {
1757         Tag tag_;
1758         string elementStart;
1759         string* s;
1760
1761         Handler commentHandler = null;
1762         Handler cdataHandler = null;
1763         Handler xiHandler = null;
1764         Handler piHandler = null;
1765         Handler rawTextHandler = null;
1766         Handler textHandler = null;
1767
1768         // Private constructor for start tags
1769         this(ElementParser parent) @safe @nogc pure nothrow
1770         {
1771             s = parent.s;
1772             this();
1773             tag_ = parent.tag_;
1774         }
1775
1776         // Private constructor for empty tags
1777         this(Tag tag, string* t) @safe @nogc pure nothrow
1778         {
1779             s = t;
1780             this();
1781             tag_ = tag;
1782         }
1783     }
1784
1785     /**
1786      * The Tag at the start of the element being parsed. You can read this to
1787      * determine the tag's name and attributes.
1788      */
1789     @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1790
1791     /**
1792      * Register a handler which will be called whenever a start tag is
1793      * encountered which matches the specified name. You can also pass null as
1794      * the name, in which case the handler will be called for any unmatched
1795      * start tag.
1796      *
1797      * Example:
1798      * --------------
1799      * // Call this function whenever a <podcast> start tag is encountered
1800      * onStartTag["podcast"] = (ElementParser xml)
1801      * {
1802      *     // Your code here
1803      *     //
1804      *     // This is a a closure, so code here may reference
1805      *     // variables which are outside of this scope
1806      * };
1807      *
1808      * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809      * // start tag is encountered
1810      * onStartTag["episode"] = &myEpisodeStartHandler;
1811      *
1812      * // call delegate dg for all other start tags
1813      * onStartTag[null] = dg;
1814      * --------------
1815      *
1816      * This library will supply your function with a new instance of
1817      * ElementHandler, which may be used to parse inside the element whose
1818      * start tag was just found, or to identify the tag attributes of the
1819      * element, etc.
1820      *
1821      * Note that your function will be called for both start tags and empty
1822      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1823      * and &lt;br/&gt;.
1824      */
1825     ParserHandler[string] onStartTag;
1826
1827     /**
1828      * Register a handler which will be called whenever an end tag is
1829      * encountered which matches the specified name. You can also pass null as
1830      * the name, in which case the handler will be called for any unmatched
1831      * end tag.
1832      *
1833      * Example:
1834      * --------------
1835      * // Call this function whenever a </podcast> end tag is encountered
1836      * onEndTag["podcast"] = (in Element e)
1837      * {
1838      *     // Your code here
1839      *     //
1840      *     // This is a a closure, so code here may reference
1841      *     // variables which are outside of this scope
1842      * };
1843      *
1844      * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845      * // end tag is encountered
1846      * onEndTag["episode"] = &myEpisodeEndHandler;
1847      *
1848      * // call delegate dg for all other end tags
1849      * onEndTag[null] = dg;
1850      * --------------
1851      *
1852      * Note that your function will be called for both start tags and empty
1853      * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1854      * and &lt;br/&gt;.
1855      */
1856     ElementHandler[string] onEndTag;
1857
1858     protected this() @safe @nogc pure nothrow
1859     {
1860         elementStart = *s;
1861     }
1862
1863     /**
1864      * Register a handler which will be called whenever text is encountered.
1865      *
1866      * Example:
1867      * --------------
1868      * // Call this function whenever text is encountered
1869      * onText = (string s)
1870      * {
1871      *     // Your code here
1872      *
1873      *     // The passed parameter s will have been decoded by the time you see
1874      *     // it, and so may contain any character.
1875      *     //
1876      *     // This is a a closure, so code here may reference
1877      *     // variables which are outside of this scope
1878      * };
1879      * --------------
1880      */
1881     @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1882
1883     /**
1884      * Register an alternative handler which will be called whenever text
1885      * is encountered. This differs from onText in that onText will decode
1886      * the text, whereas onTextRaw will not. This allows you to make design
1887      * choices, since onText will be more accurate, but slower, while
1888      * onTextRaw will be faster, but less accurate. Of course, you can
1889      * still call decode() within your handler, if you want, but you'd
1890      * probably want to use onTextRaw only in circumstances where you
1891      * know that decoding is unnecessary.
1892      *
1893      * Example:
1894      * --------------
1895      * // Call this function whenever text is encountered
1896      * onText = (string s)
1897      * {
1898      *     // Your code here
1899      *
1900      *     // The passed parameter s will NOT have been decoded.
1901      *     //
1902      *     // This is a a closure, so code here may reference
1903      *     // variables which are outside of this scope
1904      * };
1905      * --------------
1906      */
1907     @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1908
1909     /**
1910      * Register a handler which will be called whenever a character data
1911      * segment is encountered.
1912      *
1913      * Example:
1914      * --------------
1915      * // Call this function whenever a CData section is encountered
1916      * onCData = (string s)
1917      * {
1918      *     // Your code here
1919      *
1920      *     // The passed parameter s does not include the opening <![CDATA[
1921      *     // nor closing ]]>
1922      *     //
1923      *     // This is a a closure, so code here may reference
1924      *     // variables which are outside of this scope
1925      * };
1926      * --------------
1927      */
1928     @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1929
1930     /**
1931      * Register a handler which will be called whenever a comment is
1932      * encountered.
1933      *
1934      * Example:
1935      * --------------
1936      * // Call this function whenever a comment is encountered
1937      * onComment = (string s)
1938      * {
1939      *     // Your code here
1940      *
1941      *     // The passed parameter s does not include the opening <!-- nor
1942      *     // closing -->
1943      *     //
1944      *     // This is a a closure, so code here may reference
1945      *     // variables which are outside of this scope
1946      * };
1947      * --------------
1948      */
1949     @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1950
1951     /**
1952      * Register a handler which will be called whenever a processing
1953      * instruction is encountered.
1954      *
1955      * Example:
1956      * --------------
1957      * // Call this function whenever a processing instruction is encountered
1958      * onPI = (string s)
1959      * {
1960      *     // Your code here
1961      *
1962      *     // The passed parameter s does not include the opening <? nor
1963      *     // closing ?>
1964      *     //
1965      *     // This is a a closure, so code here may reference
1966      *     // variables which are outside of this scope
1967      * };
1968      * --------------
1969      */
1970     @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1971
1972     /**
1973      * Register a handler which will be called whenever an XML instruction is
1974      * encountered.
1975      *
1976      * Example:
1977      * --------------
1978      * // Call this function whenever an XML instruction is encountered
1979      * // (Note: XML instructions may only occur preceding the root tag of a
1980      * // document).
1981      * onPI = (string s)
1982      * {
1983      *     // Your code here
1984      *
1985      *     // The passed parameter s does not include the opening <! nor
1986      *     // closing >
1987      *     //
1988      *     // This is a a closure, so code here may reference
1989      *     // variables which are outside of this scope
1990      * };
1991      * --------------
1992      */
1993     @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1994
1995     /**
1996      * Parse an XML element.
1997      *
1998      * Parsing will continue until the end of the current element. Any items
1999      * encountered for which a handler has been registered will invoke that
2000      * handler.
2001      *
2002      * Throws: various kinds of XMLException
2003      */
2004     void parse()
2005     {
2006         import std.algorithm.searching : startsWith;
2007         import std.string : indexOf;
2008
2009         string t;
2010         const Tag root = tag_;
2011         Tag[string] startTags;
2012         if (tag_ !is null) startTags[tag_.name] = tag_;
2013
2014         while (s.length != 0)
2015         {
2016             if (startsWith(*s,"<!--"))
2017             {
2018                 chop(*s,4);
2019                 t = chop(*s,indexOf(*s,"-->"));
2020                 if (commentHandler.funcptr !is null) commentHandler(t);
2021                 chop(*s,3);
2022             }
2023             else if (startsWith(*s,"<![CDATA["))
2024             {
2025                 chop(*s,9);
2026                 t = chop(*s,indexOf(*s,"]]>"));
2027                 if (cdataHandler.funcptr !is null) cdataHandler(t);
2028                 chop(*s,3);
2029             }
2030             else if (startsWith(*s,"<!"))
2031             {
2032                 chop(*s,2);
2033                 t = chop(*s,indexOf(*s,">"));
2034                 if (xiHandler.funcptr !is null) xiHandler(t);
2035                 chop(*s,1);
2036             }
2037             else if (startsWith(*s,"<?"))
2038             {
2039                 chop(*s,2);
2040                 t = chop(*s,indexOf(*s,"?>"));
2041                 if (piHandler.funcptr !is null) piHandler(t);
2042                 chop(*s,2);
2043             }
2044             else if (startsWith(*s,"<"))
2045             {
2046                 tag_ = new Tag(*s,true);
2047                 if (root is null)
2048                     return; // Return to constructor of derived class
2049
2050                 if (tag_.isStart)
2051                 {
2052                     startTags[tag_.name] = tag_;
2053
2054                     auto parser = new ElementParser(this);
2055
2056                     auto handler = tag_.name in onStartTag;
2057                     if (handler !is null) (*handler)(parser);
2058                     else
2059                     {
2060                         handler = null in onStartTag;
2061                         if (handler !is null) (*handler)(parser);
2062                     }
2063                 }
2064                 else if (tag_.isEnd)
2065                 {
2066                     const startTag = startTags[tag_.name];
2067                     string text;
2068
2069                     if (startTag.tagString.length == 0)
2070                         assert(0);
2071
2072                     immutable(char)* p = startTag.tagString.ptr
2073                         + startTag.tagString.length;
2074                     immutable(char)* q = &tag_.tagString[0];
2075                     text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2076
2077                     auto element = new Element(startTag);
2078                     if (text.length != 0) element ~= new Text(text);
2079
2080                     auto handler = tag_.name in onEndTag;
2081                     if (handler !is null) (*handler)(element);
2082                     else
2083                     {
2084                         handler = null in onEndTag;
2085                         if (handler !is null) (*handler)(element);
2086                     }
2087
2088                     if (tag_.name == root.name) return;
2089                 }
2090                 else if (tag_.isEmpty)
2091                 {
2092                     Tag startTag = new Tag(tag_.name);
2093
2094                     // FIX by hed010gy, for bug 2979
2095                     // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096                     if (tag_.attr.length > 0)
2097                           foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2098                     // END FIX
2099
2100                     // Handle the pretend start tag
2101                     string s2;
2102                     auto parser = new ElementParser(startTag,&s2);
2103                     auto handler1 = startTag.name in onStartTag;
2104                     if (handler1 !is null) (*handler1)(parser);
2105                     else
2106                     {
2107                         handler1 = null in onStartTag;
2108                         if (handler1 !is null) (*handler1)(parser);
2109                     }
2110
2111                     // Handle the pretend end tag
2112                     auto element = new Element(startTag);
2113                     auto handler2 = tag_.name in onEndTag;
2114                     if (handler2 !is null) (*handler2)(element);
2115                     else
2116                     {
2117                         handler2 = null in onEndTag;
2118                         if (handler2 !is null) (*handler2)(element);
2119                     }
2120                 }
2121             }
2122             else
2123             {
2124                 t = chop(*s,indexOf(*s,"<"));
2125                 if (rawTextHandler.funcptr !is null)
2126                     rawTextHandler(t);
2127                 else if (textHandler.funcptr !is null)
2128                     textHandler(decode(t,DecodeMode.LOOSE));
2129             }
2130         }
2131     }
2132
2133     /**
2134      * Returns that part of the element which has already been parsed
2135      */
2136     override string toString() const @nogc @safe pure nothrow
2137     {
2138         assert(elementStart.length >= s.length);
2139         return elementStart[0 .. elementStart.length - s.length];
2140     }
2141
2142 }
2143
2144 private
2145 {
2146     template Check(string msg)
2147     {
2148         string old = s;
2149
2150         void fail() @safe pure
2151         {
2152             s = old;
2153             throw new Err(s,msg);
2154         }
2155
2156         void fail(Err e) @safe pure
2157         {
2158             s = old;
2159             throw new Err(s,msg,e);
2160         }
2161
2162         void fail(string msg2) @safe pure
2163         {
2164             fail(new Err(s,msg2));
2165         }
2166     }
2167
2168     void checkMisc(ref string s) @safe pure // rule 27
2169     {
2170         import std.algorithm.searching : startsWith;
2171
2172         mixin Check!("Misc");
2173
2174         try
2175         {
2176                  if (s.startsWith("<!--")) { checkComment(s); }
2177             else if (s.startsWith("<?"))   { checkPI(s); }
2178             else                           { checkSpace(s); }
2179         }
2180         catch (Err e) { fail(e); }
2181     }
2182
2183     void checkDocument(ref string s) @safe pure // rule 1
2184     {
2185         mixin Check!("Document");
2186         try
2187         {
2188             checkProlog(s);
2189             checkElement(s);
2190             star!(checkMisc)(s);
2191         }
2192         catch (Err e) { fail(e); }
2193     }
2194
2195     void checkChars(ref string s) @safe pure // rule 2
2196     {
2197         // TO DO - Fix std.utf stride and decode functions, then use those
2198         // instead
2199         import std.format : format;
2200
2201         mixin Check!("Chars");
2202
2203         dchar c;
2204         int n = -1;
2205         foreach (int i,dchar d; s)
2206         {
2207             if (!isChar(d))
2208             {
2209                 c = d;
2210                 n = i;
2211                 break;
2212             }
2213         }
2214         if (n != -1)
2215         {
2216             s = s[n..$];
2217             fail(format("invalid character: U+%04X",c));
2218         }
2219     }
2220
2221     void checkSpace(ref string s) @safe pure // rule 3
2222     {
2223         import std.algorithm.searching : countUntil;
2224         import std.ascii : isWhite;
2225         import std.utf : byCodeUnit;
2226
2227         mixin Check!("Whitespace");
2228         ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2229         if (i == -1 && s.length > 0 && isWhite(s[0]))
2230             s = s[$ .. $];
2231         else if (i > -1)
2232             s = s[i .. $];
2233         if (s is old) fail();
2234     }
2235
2236     void checkName(ref string s, out string name) @safe pure // rule 5
2237     {
2238         mixin Check!("Name");
2239
2240         if (s.length == 0) fail();
2241         int n;
2242         foreach (int i,dchar c;s)
2243         {
2244             if (c == '_' || c == ':' || isLetter(c)) continue;
2245             if (i == 0) fail();
2246             if (c == '-' || c == '.' || isDigit(c)
2247                 || isCombiningChar(c) || isExtender(c)) continue;
2248             n = i;
2249             break;
2250         }
2251         name = s[0 .. n];
2252         s = s[n..$];
2253     }
2254
2255     void checkAttValue(ref string s) @safe pure // rule 10
2256     {
2257         import std.algorithm.searching : countUntil;
2258         import std.utf : byCodeUnit;
2259
2260         mixin Check!("AttValue");
2261
2262         if (s.length == 0) fail();
2263         char c = s[0];
2264         if (c != '\u0022' && c != '\u0027')
2265             fail("attribute value requires quotes");
2266         s = s[1..$];
2267         for (;;)
2268         {
2269             s = s[s.byCodeUnit.countUntil(c) .. $];
2270             if (s.length == 0) fail("unterminated attribute value");
2271             if (s[0] == '<') fail("< found in attribute value");
2272             if (s[0] == c) break;
2273             try { checkReference(s); } catch (Err e) { fail(e); }
2274         }
2275         s = s[1..$];
2276     }
2277
2278     void checkCharData(ref string s) @safe pure // rule 14
2279     {
2280         import std.algorithm.searching : startsWith;
2281
2282         mixin Check!("CharData");
2283
2284         while (s.length != 0)
2285         {
2286             if (s.startsWith("&")) break;
2287             if (s.startsWith("<")) break;
2288             if (s.startsWith("]]>")) fail("]]> found within char data");
2289             s = s[1..$];
2290         }
2291     }
2292
2293     void checkComment(ref string s) @safe pure // rule 15
2294     {
2295         import std.string : indexOf;
2296
2297         mixin Check!("Comment");
2298
2299         try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2300         ptrdiff_t n = s.indexOf("--");
2301         if (n == -1) fail("unterminated comment");
2302         s = s[n..$];
2303         try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2304     }
2305
2306     void checkPI(ref string s) @safe pure // rule 16
2307     {
2308         mixin Check!("PI");
2309
2310         try
2311         {
2312             checkLiteral("<?",s);
2313             checkEnd("?>",s);
2314         }
2315         catch (Err e) { fail(e); }
2316     }
2317
2318     void checkCDSect(ref string s) @safe pure // rule 18
2319     {
2320         mixin Check!("CDSect");
2321
2322         try
2323         {
2324             checkLiteral(cdata,s);
2325             checkEnd("]]>",s);
2326         }
2327         catch (Err e) { fail(e); }
2328     }
2329
2330     void checkProlog(ref string s) @safe pure // rule 22
2331     {
2332         mixin Check!("Prolog");
2333
2334         try
2335         {
2336             /* The XML declaration is optional
2337              * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2338              */
2339             opt!(checkXMLDecl)(s);
2340
2341             star!(checkMisc)(s);
2342             opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2343         }
2344         catch (Err e) { fail(e); }
2345     }
2346
2347     void checkXMLDecl(ref string s) @safe pure // rule 23
2348     {
2349         mixin Check!("XMLDecl");
2350
2351         try
2352         {
2353             checkLiteral("<?xml",s);
2354             checkVersionInfo(s);
2355             opt!(checkEncodingDecl)(s);
2356             opt!(checkSDDecl)(s);
2357             opt!(checkSpace)(s);
2358             checkLiteral("?>",s);
2359         }
2360         catch (Err e) { fail(e); }
2361     }
2362
2363     void checkVersionInfo(ref string s) @safe pure // rule 24
2364     {
2365         mixin Check!("VersionInfo");
2366
2367         try
2368         {
2369             checkSpace(s);
2370             checkLiteral("version",s);
2371             checkEq(s);
2372             quoted!(checkVersionNum)(s);
2373         }
2374         catch (Err e) { fail(e); }
2375     }
2376
2377     void checkEq(ref string s) @safe pure // rule 25
2378     {
2379         mixin Check!("Eq");
2380
2381         try
2382         {
2383             opt!(checkSpace)(s);
2384             checkLiteral("=",s);
2385             opt!(checkSpace)(s);
2386         }
2387         catch (Err e) { fail(e); }
2388     }
2389
2390     void checkVersionNum(ref string s) @safe pure // rule 26
2391     {
2392         import std.algorithm.searching : countUntil;
2393         import std.utf : byCodeUnit;
2394
2395         mixin Check!("VersionNum");
2396
2397         s = s[s.byCodeUnit.countUntil('\"') .. $];
2398         if (s is old) fail();
2399     }
2400
2401     void checkDocTypeDecl(ref string s) @safe pure // rule 28
2402     {
2403         mixin Check!("DocTypeDecl");
2404
2405         try
2406         {
2407             checkLiteral("<!DOCTYPE",s);
2408             //
2409             // TO DO -- ensure DOCTYPE is well formed
2410             // (But not yet. That's one of our "future directions")
2411             //
2412             checkEnd(">",s);
2413         }
2414         catch (Err e) { fail(e); }
2415     }
2416
2417     void checkSDDecl(ref string s) @safe pure // rule 32
2418     {
2419         import std.algorithm.searching : startsWith;
2420
2421         mixin Check!("SDDecl");
2422
2423         try
2424         {
2425             checkSpace(s);
2426             checkLiteral("standalone",s);
2427             checkEq(s);
2428         }
2429         catch (Err e) { fail(e); }
2430
2431         int n = 0;
2432              if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2433         else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2434         else fail("standalone attribute value must be 'yes', \"yes\","~
2435             " 'no' or \"no\"");
2436         s = s[n..$];
2437     }
2438
2439     void checkElement(ref string s) @safe pure // rule 39
2440     {
2441         mixin Check!("Element");
2442
2443         string sname,ename,t;
2444         try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2445
2446         if (t == "STag")
2447         {
2448             try
2449             {
2450                 checkContent(s);
2451                 t = s;
2452                 checkETag(s,ename);
2453             }
2454             catch (Err e) { fail(e); }
2455
2456             if (sname != ename)
2457             {
2458                 s = t;
2459                 fail("end tag name \"" ~ ename
2460                     ~ "\" differs from start tag name \""~sname~"\"");
2461             }
2462         }
2463     }
2464
2465     // rules 40 and 44
2466     void checkTag(ref string s, out string type, out string name) @safe pure
2467     {
2468         mixin Check!("Tag");
2469
2470         try
2471         {
2472             type = "STag";
2473             checkLiteral("<",s);
2474             checkName(s,name);
2475             star!(seq!(checkSpace,checkAttribute))(s);
2476             opt!(checkSpace)(s);
2477             if (s.length != 0 && s[0] == '/')
2478             {
2479                 s = s[1..$];
2480                 type = "ETag";
2481             }
2482             checkLiteral(">",s);
2483         }
2484         catch (Err e) { fail(e); }
2485     }
2486
2487     void checkAttribute(ref string s) @safe pure // rule 41
2488     {
2489         mixin Check!("Attribute");
2490
2491         try
2492         {
2493             string name;
2494             checkName(s,name);
2495             checkEq(s);
2496             checkAttValue(s);
2497         }
2498         catch (Err e) { fail(e); }
2499     }
2500
2501     void checkETag(ref string s, out string name) @safe pure // rule 42
2502     {
2503         mixin Check!("ETag");
2504
2505         try
2506         {
2507             checkLiteral("</",s);
2508             checkName(s,name);
2509             opt!(checkSpace)(s);
2510             checkLiteral(">",s);
2511         }
2512         catch (Err e) { fail(e); }
2513     }
2514
2515     void checkContent(ref string s) @safe pure // rule 43
2516     {
2517         import std.algorithm.searching : startsWith;
2518
2519         mixin Check!("Content");
2520
2521         try
2522         {
2523             while (s.length != 0)
2524             {
2525                 old = s;
2526                      if (s.startsWith("&"))        { checkReference(s); }
2527                 else if (s.startsWith("<!--"))     { checkComment(s); }
2528                 else if (s.startsWith("<?"))       { checkPI(s); }
2529                 else if (s.startsWith(cdata)) { checkCDSect(s); }
2530                 else if (s.startsWith("</"))       { break; }
2531                 else if (s.startsWith("<"))        { checkElement(s); }
2532                 else                               { checkCharData(s); }
2533             }
2534         }
2535         catch (Err e) { fail(e); }
2536     }
2537
2538     void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2539     {
2540         import std.format : format;
2541
2542         mixin Check!("CharRef");
2543
2544         c = 0;
2545         try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2546         int radix = 10;
2547         if (s.length != 0 && s[0] == 'x')
2548         {
2549             s = s[1..$];
2550             radix = 16;
2551         }
2552         if (s.length == 0) fail("unterminated character reference");
2553         if (s[0] == ';')
2554             fail("character reference must have at least one digit");
2555         while (s.length != 0)
2556         {
2557             immutable char d = s[0];
2558             int n = 0;
2559             switch (d)
2560             {
2561                 case 'F','f': ++n;      goto case;
2562                 case 'E','e': ++n;      goto case;
2563                 case 'D','d': ++n;      goto case;
2564                 case 'C','c': ++n;      goto case;
2565                 case 'B','b': ++n;      goto case;
2566                 case 'A','a': ++n;      goto case;
2567                 case '9':     ++n;      goto case;
2568                 case '8':     ++n;      goto case;
2569                 case '7':     ++n;      goto case;
2570                 case '6':     ++n;      goto case;
2571                 case '5':     ++n;      goto case;
2572                 case '4':     ++n;      goto case;
2573                 case '3':     ++n;      goto case;
2574                 case '2':     ++n;      goto case;
2575                 case '1':     ++n;      goto case;
2576                 case '0':     break;
2577                 default: n = 100; break;
2578             }
2579             if (n >= radix) break;
2580             c *= radix;
2581             c += n;
2582             s = s[1..$];
2583         }
2584         if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2585         if (s.length == 0 || s[0] != ';') fail("expected ;");
2586         else s = s[1..$];
2587     }
2588
2589     void checkReference(ref string s) @safe pure // rule 67
2590     {
2591         import std.algorithm.searching : startsWith;
2592
2593         mixin Check!("Reference");
2594
2595         try
2596         {
2597             dchar c;
2598             if (s.startsWith("&#")) checkCharRef(s,c);
2599             else checkEntityRef(s);
2600         }
2601         catch (Err e) { fail(e); }
2602     }
2603
2604     void checkEntityRef(ref string s) @safe pure // rule 68
2605     {
2606         mixin Check!("EntityRef");
2607
2608         try
2609         {
2610             string name;
2611             checkLiteral("&",s);
2612             checkName(s,name);
2613             checkLiteral(";",s);
2614         }
2615         catch (Err e) { fail(e); }
2616     }
2617
2618     void checkEncName(ref string s) @safe pure // rule 81
2619     {
2620         import std.algorithm.searching : countUntil;
2621         import std.ascii : isAlpha;
2622         import std.utf : byCodeUnit;
2623
2624         mixin Check!("EncName");
2625
2626         s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2627         if (s is old) fail();
2628         s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2629     }
2630
2631     void checkEncodingDecl(ref string s) @safe pure // rule 80
2632     {
2633         mixin Check!("EncodingDecl");
2634
2635         try
2636         {
2637             checkSpace(s);
2638             checkLiteral("encoding",s);
2639             checkEq(s);
2640             quoted!(checkEncName)(s);
2641         }
2642         catch (Err e) { fail(e); }
2643     }
2644
2645     // Helper functions
2646
2647     void checkLiteral(string literal,ref string s) @safe pure
2648     {
2649         import std.string : startsWith;
2650
2651         mixin Check!("Literal");
2652
2653         if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2654         s = s[literal.length..$];
2655     }
2656
2657     void checkEnd(string end,ref string s) @safe pure
2658     {
2659         import std.string : indexOf;
2660         // Deliberately no mixin Check here.
2661
2662         auto n = s.indexOf(end);
2663         if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2664         s = s[n..$];
2665         checkLiteral(end,s);
2666     }
2667
2668     // Metafunctions -- none of these use mixin Check
2669
2670     void opt(alias f)(ref string s)
2671     {
2672         try { f(s); } catch (Err e) {}
2673     }
2674
2675     void plus(alias f)(ref string s)
2676     {
2677         f(s);
2678         star!(f)(s);
2679     }
2680
2681     void star(alias f)(ref string s)
2682     {
2683         while (s.length != 0)
2684         {
2685             try { f(s); }
2686             catch (Err e) { return; }
2687         }
2688     }
2689
2690     void quoted(alias f)(ref string s)
2691     {
2692         import std.string : startsWith;
2693
2694         if (s.startsWith("'"))
2695         {
2696             checkLiteral("'",s);
2697             f(s);
2698             checkLiteral("'",s);
2699         }
2700         else
2701         {
2702             checkLiteral("\"",s);
2703             f(s);
2704             checkLiteral("\"",s);
2705         }
2706     }
2707
2708     void seq(alias f,alias g)(ref string s)
2709     {
2710         f(s);
2711         g(s);
2712     }
2713 }
2714
2715 /**
2716  * Check an entire XML document for well-formedness
2717  *
2718  * Params:
2719  *      s = the document to be checked, passed as a string
2720  *
2721  * Throws: CheckException if the document is not well formed
2722  *
2723  * CheckException's toString() method will yield the complete hierarchy of
2724  * parse failure (the XML equivalent of a stack trace), giving the line and
2725  * column number of every failure at every level.
2726  */
2727 void check(string s) @safe pure
2728 {
2729     try
2730     {
2731         checkChars(s);
2732         checkDocument(s);
2733         if (s.length != 0) throw new Err(s,"Junk found after document");
2734     }
2735     catch (Err e)
2736     {
2737         e.complete(s);
2738         throw e;
2739     }
2740 }
2741
2742 @system pure unittest
2743 {
2744     import std.string : indexOf;
2745
2746     try
2747     {
2748         check(q"[<?xml version="1.0"?>
2749         <catalog>
2750            <book id="bk101">
2751               <author>Gambardella, Matthew</author>
2752               <title>XML Developer's Guide</title>
2753               <genre>Computer</genre>
2754               <price>44.95</price>
2755               <publish_date>2000-10-01</publish_date>
2756               <description>An in-depth look at creating applications
2757               with XML.</description>
2758            </book>
2759            <book id="bk102">
2760               <author>Ralls, Kim</author>
2761               <title>Midnight Rain</title>
2762               <genre>Fantasy</genres>
2763               <price>5.95</price>
2764               <publish_date>2000-12-16</publish_date>
2765               <description>A former architect battles corporate zombies,
2766               an evil sorceress, and her own childhood to become queen
2767               of the world.</description>
2768            </book>
2769            <book id="bk103">
2770               <author>Corets, Eva</author>
2771               <title>Maeve Ascendant</title>
2772               <genre>Fantasy</genre>
2773               <price>5.95</price>
2774               <publish_date>2000-11-17</publish_date>
2775               <description>After the collapse of a nanotechnology
2776               society in England, the young survivors lay the
2777               foundation for a new society.</description>
2778            </book>
2779         </catalog>
2780         ]");
2781         assert(false);
2782     }
2783     catch (CheckException e)
2784     {
2785         auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2786                                       " from start tag name \"genre\"");
2787         assert(n != -1);
2788     }
2789 }
2790
2791 @system unittest
2792 {
2793     string s = q"EOS
2794 <?xml version="1.0"?>
2795 <set>
2796     <one>A</one>
2797     <!-- comment -->
2798     <two>B</two>
2799 </set>
2800 EOS";
2801     try
2802     {
2803         check(s);
2804     }
2805     catch (CheckException e)
2806     {
2807         assert(0, e.toString());
2808     }
2809 }
2810
2811 @system unittest
2812 {
2813     string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2814                         xmlns:stream="http://etherx.'jabber'.org/streams"
2815                         xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2816                         xml:lang="en" version="1.0" attr='a"b"c'>
2817                         </stream:stream></r>`;
2818
2819     DocumentParser parser = new DocumentParser(test_xml);
2820     bool tested = false;
2821     parser.onStartTag["stream:stream"] = (ElementParser p) {
2822         assert(p.tag.attr["xmlns"] == "jabber:'client'");
2823         assert(p.tag.attr["from"] == "jid.pl");
2824         assert(p.tag.attr["attr"] == "a\"b\"c");
2825         tested = true;
2826     };
2827     parser.parse();
2828     assert(tested);
2829 }
2830
2831 @system unittest
2832 {
2833     string s = q"EOS
2834 <?xml version="1.0" encoding="utf-8"?> <Tests>
2835     <Test thing="What &amp; Up">What &amp; Up Second</Test>
2836 </Tests>
2837 EOS";
2838     auto xml = new DocumentParser(s);
2839
2840     xml.onStartTag["Test"] = (ElementParser xml) {
2841         assert(xml.tag.attr["thing"] == "What & Up");
2842     };
2843
2844     xml.onEndTag["Test"] = (in Element e) {
2845         assert(e.text() == "What & Up Second");
2846     };
2847     xml.parse();
2848 }
2849
2850 @system unittest
2851 {
2852     string s = `<tag attr="&quot;value&gt;" />`;
2853     auto doc = new Document(s);
2854     assert(doc.toString() == s);
2855 }
2856
2857 /** The base class for exceptions thrown by this module */
2858 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2859
2860 // Other exceptions
2861
2862 /// Thrown during Comment constructor
2863 class CommentException : XMLException
2864 { private this(string msg) @safe pure { super(msg); } }
2865
2866 /// Thrown during CData constructor
2867 class CDataException : XMLException
2868 { private this(string msg) @safe pure { super(msg); } }
2869
2870 /// Thrown during XMLInstruction constructor
2871 class XIException : XMLException
2872 { private this(string msg) @safe pure { super(msg); } }
2873
2874 /// Thrown during ProcessingInstruction constructor
2875 class PIException : XMLException
2876 { private this(string msg) @safe pure { super(msg); } }
2877
2878 /// Thrown during Text constructor
2879 class TextException : XMLException
2880 { private this(string msg) @safe pure { super(msg); } }
2881
2882 /// Thrown during decode()
2883 class DecodeException : XMLException
2884 { private this(string msg) @safe pure { super(msg); } }
2885
2886 /// Thrown if comparing with wrong type
2887 class InvalidTypeException : XMLException
2888 { private this(string msg) @safe pure { super(msg); } }
2889
2890 /// Thrown when parsing for Tags
2891 class TagException : XMLException
2892 { private this(string msg) @safe pure { super(msg); } }
2893
2894 /**
2895  * Thrown during check()
2896  */
2897 class CheckException : XMLException
2898 {
2899     CheckException err; /// Parent in hierarchy
2900     private string tail;
2901     /**
2902      * Name of production rule which failed to parse,
2903      * or specific error message
2904      */
2905     string msg;
2906     size_t line = 0; /// Line number at which parse failure occurred
2907     size_t column = 0; /// Column number at which parse failure occurred
2908
2909     private this(string tail,string msg,Err err=null) @safe pure
2910     {
2911         super(null);
2912         this.tail = tail;
2913         this.msg = msg;
2914         this.err = err;
2915     }
2916
2917     private void complete(string entire) @safe pure
2918     {
2919         import std.string : count, lastIndexOf;
2920         import std.utf : toUTF32;
2921
2922         string head = entire[0..$-tail.length];
2923         ptrdiff_t n = head.lastIndexOf('\n') + 1;
2924         line = head.count("\n") + 1;
2925         dstring t = toUTF32(head[n..$]);
2926         column = t.length + 1;
2927         if (err !is null) err.complete(entire);
2928     }
2929
2930     override string toString() const @safe pure
2931     {
2932         import std.format : format;
2933
2934         string s;
2935         if (line != 0) s = format("Line %d, column %d: ",line,column);
2936         s ~= msg;
2937         s ~= '\n';
2938         if (err !is null) s = err.toString() ~ s;
2939         return s;
2940     }
2941 }
2942
2943 private alias Err = CheckException;
2944
2945 // Private helper functions
2946
2947 private
2948 {
2949     inout(T) toType(T)(inout Object o)
2950     {
2951         T t = cast(T)(o);
2952         if (t is null)
2953         {
2954             throw new InvalidTypeException("Attempt to compare a "
2955                 ~ T.stringof ~ " with an instance of another type");
2956         }
2957         return t;
2958     }
2959
2960     string chop(ref string s, size_t n) @safe pure nothrow
2961     {
2962         if (n == -1) n = s.length;
2963         string t = s[0 .. n];
2964         s = s[n..$];
2965         return t;
2966     }
2967
2968     bool optc(ref string s, char c) @safe pure nothrow
2969     {
2970         immutable bool b = s.length != 0 && s[0] == c;
2971         if (b) s = s[1..$];
2972         return b;
2973     }
2974
2975     void reqc(ref string s, char c) @safe pure
2976     {
2977         if (s.length == 0 || s[0] != c) throw new TagException("");
2978         s = s[1..$];
2979     }
2980
2981     char requireOneOf(ref string s, string chars) @safe pure
2982     {
2983         import std.string : indexOf;
2984
2985         if (s.length == 0 || indexOf(chars,s[0]) == -1)
2986             throw new TagException("");
2987         immutable char ch = s[0];
2988         s = s[1..$];
2989         return ch;
2990     }
2991
2992     size_t hash(string s,size_t h=0) @trusted nothrow
2993     {
2994         return typeid(s).getHash(&s) + h;
2995     }
2996
2997     // Definitions from the XML specification
2998     immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
2999         0x10000,0x10FFFF];
3000     immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3001         0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3002         0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3003         0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3004         0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3005         0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3006         0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3007         0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3008         0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3009         0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3010         0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3011         0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3012         0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3013         0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3014         0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3015         0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3016         0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3017         0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3018         0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3019         0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3020         0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3021         0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3022         0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3023         0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3024         0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3025         0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3026         0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3027         0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3028         0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3029         0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3030         0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3031         0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3032         0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3033         0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3034         0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3035         0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3036         0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3037         0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3038         0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3039         0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3040         0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3041     immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3042     immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3043         0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3044         0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3045         0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3046         0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3047         0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3048         0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3049         0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3050         0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3051         0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3052         0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3053         0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3054         0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3055         0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3056         0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3057         0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3058         0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3059         0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3060         0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3061         0x3099,0x3099,0x309A,0x309A];
3062     immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3063         0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3064         0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3065         0x0ED9,0x0F20,0x0F29];
3066     immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3067         0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3068         0x3035,0x309D,0x309E,0x30FC,0x30FE];
3069
3070     bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3071     {
3072         while (table.length != 0)
3073         {
3074             auto m = (table.length >> 1) & ~1;
3075             if (c < table[m])
3076             {
3077                 table = table[0 .. m];
3078             }
3079             else if (c > table[m+1])
3080             {
3081                 table = table[m+2..$];
3082             }
3083             else return true;
3084         }
3085         return false;
3086     }
3087
3088     string startOf(string s) @safe nothrow pure
3089     {
3090         string r;
3091         foreach (char c;s)
3092         {
3093             r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3094             if (r.length >= 40) { r ~= "___"; break; }
3095         }
3096         return r;
3097     }
3098
3099     void exit(string s=null)
3100     {
3101         throw new XMLException(s);
3102     }
3103 }