Fix build on sparc64-linux-gnu.
[official-gcc.git] / libphobos / src / std / xml.d
blob770c56fdbfbe8ca53de68e4eddc7bbb40ce7f39a
1 // Written in the D programming language.
3 /**
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5 current standards. It will remain until we have a suitable replacement,
6 but be aware that it will not remain long term.)
8 Classes and functions for creating and parsing XML
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
18 Example: This example creates a DOM (Document Object Model) tree
19 from an XML file.
20 ------------------------------------------------------------------------------
21 import std.xml;
22 import std.stdio;
23 import std.string;
24 import std.file;
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
31 void main()
33 string s = cast(string) std.file.read("books.xml");
35 // Check for well-formedness
36 check(s);
38 // Make a DOM tree
39 auto doc = new Document(s);
41 // Plain-print it
42 writeln(doc);
44 ------------------------------------------------------------------------------
46 Example: This example does much the same thing, except that the file is
47 deconstructed and reconstructed by hand. This is more work, but the
48 techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
50 import std.xml;
51 import std.stdio;
52 import std.string;
54 struct Book
56 string id;
57 string author;
58 string title;
59 string genre;
60 string price;
61 string pubDate;
62 string description;
65 void main()
67 string s = cast(string) std.file.read("books.xml");
69 // Check for well-formedness
70 check(s);
72 // Take it apart
73 Book[] books;
75 auto xml = new DocumentParser(s);
76 xml.onStartTag["book"] = (ElementParser xml)
78 Book book;
79 book.id = xml.tag.attr["id"];
81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
88 xml.parse();
90 books ~= book;
92 xml.parse();
94 // Put it back together again;
95 auto doc = new Document(new Tag("catalog"));
96 foreach (book;books)
98 auto element = new Element("book");
99 element.tag.attr["id"] = book.id;
101 element ~= new Element("author", book.author);
102 element ~= new Element("title", book.title);
103 element ~= new Element("genre", book.genre);
104 element ~= new Element("price", book.price);
105 element ~= new Element("publish-date",book.pubDate);
106 element ~= new Element("description", book.description);
108 doc ~= element;
111 // Pretty-print it
112 writefln(join(doc.pretty(3),"\n"));
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors: Janice Caron
118 Source: $(PHOBOSSRC std/_xml.d)
121 Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123 (See accompanying file LICENSE_1_0.txt or copy at
124 http://www.boost.org/LICENSE_1_0.txt)
126 module std.xml;
128 enum cdata = "<![CDATA[";
131 * Returns true if the character is a character according to the XML standard
133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
135 * Params:
136 * c = the character to be tested
138 bool isChar(dchar c) @safe @nogc pure nothrow // rule 2
140 if (c <= 0xD7FF)
142 if (c >= 0x20)
143 return true;
144 switch (c)
146 case 0xA:
147 case 0x9:
148 case 0xD:
149 return true;
150 default:
151 return false;
154 else if (0xE000 <= c && c <= 0x10FFFF)
156 if ((c & 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
157 return true;
159 return false;
162 @safe @nogc nothrow pure unittest
164 assert(!isChar(cast(dchar) 0x8));
165 assert( isChar(cast(dchar) 0x9));
166 assert( isChar(cast(dchar) 0xA));
167 assert(!isChar(cast(dchar) 0xB));
168 assert(!isChar(cast(dchar) 0xC));
169 assert( isChar(cast(dchar) 0xD));
170 assert(!isChar(cast(dchar) 0xE));
171 assert(!isChar(cast(dchar) 0x1F));
172 assert( isChar(cast(dchar) 0x20));
173 assert( isChar('J'));
174 assert( isChar(cast(dchar) 0xD7FF));
175 assert(!isChar(cast(dchar) 0xD800));
176 assert(!isChar(cast(dchar) 0xDFFF));
177 assert( isChar(cast(dchar) 0xE000));
178 assert( isChar(cast(dchar) 0xFFFD));
179 assert(!isChar(cast(dchar) 0xFFFE));
180 assert(!isChar(cast(dchar) 0xFFFF));
181 assert( isChar(cast(dchar) 0x10000));
182 assert( isChar(cast(dchar) 0x10FFFF));
183 assert(!isChar(cast(dchar) 0x110000));
185 debug (stdxml_TestHardcodedChecks)
187 foreach (c; 0 .. dchar.max + 1)
188 assert(isChar(c) == lookup(CharTable, c));
193 * Returns true if the character is whitespace according to the XML standard
195 * Only the following characters are considered whitespace in XML - space, tab,
196 * carriage return and linefeed
198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
200 * Params:
201 * c = the character to be tested
203 bool isSpace(dchar c) @safe @nogc pure nothrow
205 return c == '\u0020' || c == '\u0009' || c == '\u000A' || c == '\u000D';
209 * Returns true if the character is a digit according to the XML standard
211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
213 * Params:
214 * c = the character to be tested
216 bool isDigit(dchar c) @safe @nogc pure nothrow
218 if (c <= 0x0039 && c >= 0x0030)
219 return true;
220 else
221 return lookup(DigitTable,c);
224 @safe @nogc nothrow pure unittest
226 debug (stdxml_TestHardcodedChecks)
228 foreach (c; 0 .. dchar.max + 1)
229 assert(isDigit(c) == lookup(DigitTable, c));
234 * Returns true if the character is a letter according to the XML standard
236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
238 * Params:
239 * c = the character to be tested
241 bool isLetter(dchar c) @safe @nogc nothrow pure // rule 84
243 return isIdeographic(c) || isBaseChar(c);
247 * Returns true if the character is an ideographic character according to the
248 * XML standard
250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
252 * Params:
253 * c = the character to be tested
255 bool isIdeographic(dchar c) @safe @nogc nothrow pure
257 if (c == 0x3007)
258 return true;
259 if (c <= 0x3029 && c >= 0x3021 )
260 return true;
261 if (c <= 0x9FA5 && c >= 0x4E00)
262 return true;
263 return false;
266 @safe @nogc nothrow pure unittest
268 assert(isIdeographic('\u4E00'));
269 assert(isIdeographic('\u9FA5'));
270 assert(isIdeographic('\u3007'));
271 assert(isIdeographic('\u3021'));
272 assert(isIdeographic('\u3029'));
274 debug (stdxml_TestHardcodedChecks)
276 foreach (c; 0 .. dchar.max + 1)
277 assert(isIdeographic(c) == lookup(IdeographicTable, c));
282 * Returns true if the character is a base character according to the XML
283 * standard
285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
287 * Params:
288 * c = the character to be tested
290 bool isBaseChar(dchar c) @safe @nogc nothrow pure
292 return lookup(BaseCharTable,c);
296 * Returns true if the character is a combining character according to the
297 * XML standard
299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
301 * Params:
302 * c = the character to be tested
304 bool isCombiningChar(dchar c) @safe @nogc nothrow pure
306 return lookup(CombiningCharTable,c);
310 * Returns true if the character is an extender according to the XML standard
312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
314 * Params:
315 * c = the character to be tested
317 bool isExtender(dchar c) @safe @nogc nothrow pure
319 return lookup(ExtenderTable,c);
323 * Encodes a string by replacing all characters which need to be escaped with
324 * appropriate predefined XML entities.
326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327 * and greater-than), and similarly, decode() unescapes them. These functions
328 * are provided for convenience only. You do not need to use them when using
329 * the std.xml classes, because then all the encoding and decoding will be done
330 * for you automatically.
332 * If the string is not modified, the original will be returned.
334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
336 * Params:
337 * s = The string to be encoded
339 * Returns: The encoded string
341 * Example:
342 * --------------
343 * writefln(encode("a > b")); // writes "a &gt; b"
344 * --------------
346 S encode(S)(S s)
348 import std.array : appender;
350 string r;
351 size_t lastI;
352 auto result = appender!S();
354 foreach (i, c; s)
356 switch (c)
358 case '&': r = "&amp;"; break;
359 case '"': r = "&quot;"; break;
360 case '\'': r = "&apos;"; break;
361 case '<': r = "&lt;"; break;
362 case '>': r = "&gt;"; break;
363 default: continue;
365 // Replace with r
366 result.put(s[lastI .. i]);
367 result.put(r);
368 lastI = i + 1;
371 if (!result.data.ptr) return s;
372 result.put(s[lastI .. $]);
373 return result.data;
376 @safe pure unittest
378 auto s = "hello";
379 assert(encode(s) is s);
380 assert(encode("a > b") == "a &gt; b", encode("a > b"));
381 assert(encode("a < b") == "a &lt; b");
382 assert(encode("don't") == "don&apos;t");
383 assert(encode("\"hi\"") == "&quot;hi&quot;", encode("\"hi\""));
384 assert(encode("cat & dog") == "cat &amp; dog");
388 * Mode to use for decoding.
390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
394 enum DecodeMode
396 NONE, LOOSE, STRICT
400 * Decodes a string by unescaping all predefined XML entities.
402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403 * and greater-than), and similarly, decode() unescapes them. These functions
404 * are provided for convenience only. You do not need to use them when using
405 * the std.xml classes, because then all the encoding and decoding will be done
406 * for you automatically.
408 * This function decodes the entities &amp;amp;, &amp;quot;, &amp;apos;,
409 * &amp;lt; and &amp;gt,
410 * as well as decimal and hexadecimal entities such as &amp;#x20AC;
412 * If the string does not contain an ampersand, the original will be returned.
414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416 * (decode, and throw a DecodeException in the event of an error).
418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
420 * Params:
421 * s = The string to be decoded
422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
426 * Returns: The decoded string
428 * Example:
429 * --------------
430 * writefln(decode("a &gt; b")); // writes "a > b"
431 * --------------
433 string decode(string s, DecodeMode mode=DecodeMode.LOOSE) @safe pure
435 import std.algorithm.searching : startsWith;
437 if (mode == DecodeMode.NONE) return s;
439 string buffer;
440 foreach (ref i; 0 .. s.length)
442 char c = s[i];
443 if (c != '&')
445 if (buffer.length != 0) buffer ~= c;
447 else
449 if (buffer.length == 0)
451 buffer = s[0 .. i].dup;
453 if (startsWith(s[i..$],"&#"))
457 dchar d;
458 string t = s[i..$];
459 checkCharRef(t, d);
460 char[4] temp;
461 import std.utf : encode;
462 buffer ~= temp[0 .. encode(temp, d)];
463 i = s.length - t.length - 1;
465 catch (Err e)
467 if (mode == DecodeMode.STRICT)
468 throw new DecodeException("Unescaped &");
469 buffer ~= '&';
472 else if (startsWith(s[i..$],"&amp;" )) { buffer ~= '&'; i += 4; }
473 else if (startsWith(s[i..$],"&quot;")) { buffer ~= '"'; i += 5; }
474 else if (startsWith(s[i..$],"&apos;")) { buffer ~= '\''; i += 5; }
475 else if (startsWith(s[i..$],"&lt;" )) { buffer ~= '<'; i += 3; }
476 else if (startsWith(s[i..$],"&gt;" )) { buffer ~= '>'; i += 3; }
477 else
479 if (mode == DecodeMode.STRICT)
480 throw new DecodeException("Unescaped &");
481 buffer ~= '&';
485 return (buffer.length == 0) ? s : buffer;
488 @safe pure unittest
490 void assertNot(string s) pure
492 bool b = false;
493 try { decode(s,DecodeMode.STRICT); }
494 catch (DecodeException e) { b = true; }
495 assert(b,s);
498 // Assert that things that should work, do
499 auto s = "hello";
500 assert(decode(s, DecodeMode.STRICT) is s);
501 assert(decode("a &gt; b", DecodeMode.STRICT) == "a > b");
502 assert(decode("a &lt; b", DecodeMode.STRICT) == "a < b");
503 assert(decode("don&apos;t", DecodeMode.STRICT) == "don't");
504 assert(decode("&quot;hi&quot;", DecodeMode.STRICT) == "\"hi\"");
505 assert(decode("cat &amp; dog", DecodeMode.STRICT) == "cat & dog");
506 assert(decode("&#42;", DecodeMode.STRICT) == "*");
507 assert(decode("&#x2A;", DecodeMode.STRICT) == "*");
508 assert(decode("cat & dog", DecodeMode.LOOSE) == "cat & dog");
509 assert(decode("a &gt b", DecodeMode.LOOSE) == "a &gt b");
510 assert(decode("&#;", DecodeMode.LOOSE) == "&#;");
511 assert(decode("&#x;", DecodeMode.LOOSE) == "&#x;");
512 assert(decode("&#2G;", DecodeMode.LOOSE) == "&#2G;");
513 assert(decode("&#x2G;", DecodeMode.LOOSE) == "&#x2G;");
515 // Assert that things that shouldn't work, don't
516 assertNot("cat & dog");
517 assertNot("a &gt b");
518 assertNot("&#;");
519 assertNot("&#x;");
520 assertNot("&#2G;");
521 assertNot("&#x2G;");
525 * Class representing an XML document.
527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
530 class Document : Element
533 * Contains all text which occurs before the root element.
534 * Defaults to &lt;?xml version="1.0"?&gt;
536 string prolog = "<?xml version=\"1.0\"?>";
538 * Contains all text which occurs after the root element.
539 * Defaults to the empty string
541 string epilog;
544 * Constructs a Document by parsing XML text.
546 * This function creates a complete DOM (Document Object Model) tree.
548 * The input to this function MUST be valid XML.
549 * This is enforced by DocumentParser's in contract.
551 * Params:
552 * s = the complete XML text.
554 this(string s)
557 assert(s.length != 0);
559 body
561 auto xml = new DocumentParser(s);
562 string tagString = xml.tag.tagString;
564 this(xml.tag);
565 prolog = s[0 .. tagString.ptr - s.ptr];
566 parse(xml);
567 epilog = *xml.s;
571 * Constructs a Document from a Tag.
573 * Params:
574 * tag = the start tag of the document.
576 this(const(Tag) tag)
578 super(tag);
581 const
584 * Compares two Documents for equality
586 * Example:
587 * --------------
588 * Document d1,d2;
589 * if (d1 == d2) { }
590 * --------------
592 override bool opEquals(scope const Object o) const
594 const doc = toType!(const Document)(o);
595 return prolog == doc.prolog
596 && (cast(const) this).Element.opEquals(cast(const) doc)
597 && epilog == doc.epilog;
601 * Compares two Documents
603 * You should rarely need to call this function. It exists so that
604 * Documents can be used as associative array keys.
606 * Example:
607 * --------------
608 * Document d1,d2;
609 * if (d1 < d2) { }
610 * --------------
612 override int opCmp(scope const Object o) scope const
614 const doc = toType!(const Document)(o);
615 if (prolog != doc.prolog)
616 return prolog < doc.prolog ? -1 : 1;
617 if (int cmp = this.Element.opCmp(doc))
618 return cmp;
619 if (epilog != doc.epilog)
620 return epilog < doc.epilog ? -1 : 1;
621 return 0;
625 * Returns the hash of a Document
627 * You should rarely need to call this function. It exists so that
628 * Documents can be used as associative array keys.
630 override size_t toHash() scope const @trusted
632 return hash(prolog, hash(epilog, (cast() this).Element.toHash()));
636 * Returns the string representation of a Document. (That is, the
637 * complete XML of a document).
639 override string toString() scope const @safe
641 return prolog ~ super.toString() ~ epilog;
646 @system unittest
648 // https://issues.dlang.org/show_bug.cgi?id=14966
649 auto xml = `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
651 auto a = new Document(xml);
652 auto b = new Document(xml);
653 assert(a == b);
654 assert(!(a < b));
655 int[Document] aa;
656 aa[a] = 1;
657 assert(aa[b] == 1);
659 b ~= new Element("b");
660 assert(a < b);
661 assert(b > a);
665 * Class representing an XML element.
667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
669 class Element : Item
671 Tag tag; /// The start tag of the element
672 Item[] items; /// The element's items
673 Text[] texts; /// The element's text items
674 CData[] cdatas; /// The element's CData items
675 Comment[] comments; /// The element's comments
676 ProcessingInstruction[] pis; /// The element's processing instructions
677 Element[] elements; /// The element's child elements
680 * Constructs an Element given a name and a string to be used as a Text
681 * interior.
683 * Params:
684 * name = the name of the element.
685 * interior = (optional) the string interior.
687 * Example:
688 * -------------------------------------------------------
689 * auto element = new Element("title","Serenity")
690 * // constructs the element <title>Serenity</title>
691 * -------------------------------------------------------
693 this(string name, string interior=null) @safe pure
695 this(new Tag(name));
696 if (interior.length != 0) opCatAssign(new Text(interior));
700 * Constructs an Element from a Tag.
702 * Params:
703 * tag_ = the start or empty tag of the element.
705 this(const(Tag) tag_) @safe pure
707 this.tag = new Tag(tag_.name);
708 tag.type = TagType.EMPTY;
709 foreach (k,v;tag_.attr) tag.attr[k] = v;
710 tag.tagString = tag_.tagString;
714 * Append a text item to the interior of this element
716 * Params:
717 * item = the item you wish to append.
719 * Example:
720 * --------------
721 * Element element;
722 * element ~= new Text("hello");
723 * --------------
725 void opCatAssign(Text item) @safe pure
727 texts ~= item;
728 appendItem(item);
732 * Append a CData item to the interior of this element
734 * Params:
735 * item = the item you wish to append.
737 * Example:
738 * --------------
739 * Element element;
740 * element ~= new CData("hello");
741 * --------------
743 void opCatAssign(CData item) @safe pure
745 cdatas ~= item;
746 appendItem(item);
750 * Append a comment to the interior of this element
752 * Params:
753 * item = the item you wish to append.
755 * Example:
756 * --------------
757 * Element element;
758 * element ~= new Comment("hello");
759 * --------------
761 void opCatAssign(Comment item) @safe pure
763 comments ~= item;
764 appendItem(item);
768 * Append a processing instruction to the interior of this element
770 * Params:
771 * item = the item you wish to append.
773 * Example:
774 * --------------
775 * Element element;
776 * element ~= new ProcessingInstruction("hello");
777 * --------------
779 void opCatAssign(ProcessingInstruction item) @safe pure
781 pis ~= item;
782 appendItem(item);
786 * Append a complete element to the interior of this element
788 * Params:
789 * item = the item you wish to append.
791 * Example:
792 * --------------
793 * Element element;
794 * Element other = new Element("br");
795 * element ~= other;
796 * // appends element representing <br />
797 * --------------
799 void opCatAssign(Element item) @safe pure
801 elements ~= item;
802 appendItem(item);
805 private void appendItem(Item item) @safe pure
807 items ~= item;
808 if (tag.type == TagType.EMPTY && !item.isEmptyXML)
809 tag.type = TagType.START;
812 private void parse(ElementParser xml)
814 xml.onText = (string s) { opCatAssign(new Text(s)); };
815 xml.onCData = (string s) { opCatAssign(new CData(s)); };
816 xml.onComment = (string s) { opCatAssign(new Comment(s)); };
817 xml.onPI = (string s) { opCatAssign(new ProcessingInstruction(s)); };
819 xml.onStartTag[null] = (ElementParser xml)
821 auto e = new Element(xml.tag);
822 e.parse(xml);
823 opCatAssign(e);
826 xml.parse();
830 * Compares two Elements for equality
832 * Example:
833 * --------------
834 * Element e1,e2;
835 * if (e1 == e2) { }
836 * --------------
838 override bool opEquals(scope const Object o) const
840 const element = toType!(const Element)(o);
841 immutable len = items.length;
842 if (len != element.items.length) return false;
843 foreach (i; 0 .. len)
845 if (!items[i].opEquals(element.items[i])) return false;
847 return true;
851 * Compares two Elements
853 * You should rarely need to call this function. It exists so that Elements
854 * can be used as associative array keys.
856 * Example:
857 * --------------
858 * Element e1,e2;
859 * if (e1 < e2) { }
860 * --------------
862 override int opCmp(scope const Object o) @safe const
864 const element = toType!(const Element)(o);
865 for (uint i=0; ; ++i)
867 if (i == items.length && i == element.items.length) return 0;
868 if (i == items.length) return -1;
869 if (i == element.items.length) return 1;
870 if (!items[i].opEquals(element.items[i]))
871 return items[i].opCmp(element.items[i]);
876 * Returns the hash of an Element
878 * You should rarely need to call this function. It exists so that Elements
879 * can be used as associative array keys.
881 override size_t toHash() scope const @safe
883 size_t hash = tag.toHash();
884 foreach (item;items) hash += item.toHash();
885 return hash;
888 const
891 * Returns the decoded interior of an element.
893 * The element is assumed to contain text <i>only</i>. So, for
894 * example, given XML such as "&lt;title&gt;Good &amp;amp;
895 * Bad&lt;/title&gt;", will return "Good &amp; Bad".
897 * Params:
898 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
900 * Throws: DecodeException if decode fails
902 string text(DecodeMode mode=DecodeMode.LOOSE)
904 string buffer;
905 foreach (item;items)
907 Text t = cast(Text) item;
908 if (t is null) throw new DecodeException(item.toString());
909 buffer ~= decode(t.toString(),mode);
911 return buffer;
915 * Returns an indented string representation of this item
917 * Params:
918 * indent = (optional) number of spaces by which to indent this
919 * element. Defaults to 2.
921 override string[] pretty(uint indent=2) scope
923 import std.algorithm.searching : count;
924 import std.string : rightJustify;
926 if (isEmptyXML) return [ tag.toEmptyString() ];
928 if (items.length == 1)
930 auto t = cast(const(Text))(items[0]);
931 if (t !is null)
933 return [tag.toStartString() ~ t.toString() ~ tag.toEndString()];
937 string[] a = [ tag.toStartString() ];
938 foreach (item;items)
940 string[] b = item.pretty(indent);
941 foreach (s;b)
943 a ~= rightJustify(s,count(s) + indent);
946 a ~= tag.toEndString();
947 return a;
951 * Returns the string representation of an Element
953 * Example:
954 * --------------
955 * auto element = new Element("br");
956 * writefln(element.toString()); // writes "<br />"
957 * --------------
959 override string toString() scope @safe
961 if (isEmptyXML) return tag.toEmptyString();
963 string buffer = tag.toStartString();
964 foreach (item;items) { buffer ~= item.toString(); }
965 buffer ~= tag.toEndString();
966 return buffer;
969 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items.length == 0; }
974 * Tag types.
976 * $(DDOC_ENUM_MEMBERS START) Used for start tags
977 * $(DDOC_ENUM_MEMBERS END) Used for end tags
978 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
981 enum TagType { START, END, EMPTY }
984 * Class representing an XML tag.
986 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
988 * The class invariant guarantees
989 * <ul>
990 * <li> that $(B type) is a valid enum TagType value</li>
991 * <li> that $(B name) consists of valid characters</li>
992 * <li> that each attribute name consists of valid characters</li>
993 * </ul>
995 class Tag
997 TagType type = TagType.START; /// Type of tag
998 string name; /// Tag name
999 string[string] attr; /// Associative array of attributes
1000 private string tagString;
1002 invariant()
1004 string s;
1005 string t;
1007 assert(type == TagType.START
1008 || type == TagType.END
1009 || type == TagType.EMPTY);
1011 s = name;
1012 try { checkName(s,t); }
1013 catch (Err e) { assert(false,"Invalid tag name:" ~ e.toString()); }
1015 foreach (k,v;attr)
1017 s = k;
1018 try { checkName(s,t); }
1019 catch (Err e)
1020 { assert(false,"Invalid atrribute name:" ~ e.toString()); }
1025 * Constructs an instance of Tag with a specified name and type
1027 * The constructor does not initialize the attributes. To initialize the
1028 * attributes, you access the $(B attr) member variable.
1030 * Params:
1031 * name = the Tag's name
1032 * type = (optional) the Tag's type. If omitted, defaults to
1033 * TagType.START.
1035 * Example:
1036 * --------------
1037 * auto tag = new Tag("img",Tag.EMPTY);
1038 * tag.attr["src"] = "http://example.com/example.jpg";
1039 * --------------
1041 this(string name, TagType type=TagType.START) @safe pure
1043 this.name = name;
1044 this.type = type;
1047 /* Private constructor (so don't ddoc this!)
1049 * Constructs a Tag by parsing the string representation, e.g. "<html>".
1051 * The string is passed by reference, and is advanced over all characters
1052 * consumed.
1054 * The second parameter is a dummy parameter only, required solely to
1055 * distinguish this constructor from the public one.
1057 private this(ref string s, bool dummy) @safe pure
1059 import std.algorithm.searching : countUntil;
1060 import std.ascii : isWhite;
1061 import std.utf : byCodeUnit;
1063 tagString = s;
1066 reqc(s,'<');
1067 if (optc(s,'/')) type = TagType.END;
1068 ptrdiff_t i = s.byCodeUnit.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1069 name = s[0 .. i];
1070 s = s[i .. $];
1072 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1073 s = s[i .. $];
1075 while (s.length > 0 && s[0] != '>' && s[0] != '/')
1077 i = s.byCodeUnit.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078 string key = s[0 .. i];
1079 s = s[i .. $];
1081 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1082 s = s[i .. $];
1083 reqc(s,'=');
1084 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1085 s = s[i .. $];
1087 immutable char quote = requireOneOf(s,"'\"");
1088 i = s.byCodeUnit.countUntil(quote);
1089 string val = decode(s[0 .. i], DecodeMode.LOOSE);
1090 s = s[i .. $];
1091 reqc(s,quote);
1093 i = s.byCodeUnit.countUntil!(a => !isWhite(a));
1094 s = s[i .. $];
1095 attr[key] = val;
1097 if (optc(s,'/'))
1099 if (type == TagType.END) throw new TagException("");
1100 type = TagType.EMPTY;
1102 reqc(s,'>');
1103 tagString.length = tagString.length - s.length;
1105 catch (XMLException e)
1107 tagString.length = tagString.length - s.length;
1108 throw new TagException(tagString);
1112 const
1115 * Compares two Tags for equality
1117 * You should rarely need to call this function. It exists so that Tags
1118 * can be used as associative array keys.
1120 * Example:
1121 * --------------
1122 * Tag tag1,tag2
1123 * if (tag1 == tag2) { }
1124 * --------------
1126 override bool opEquals(scope Object o)
1128 const tag = toType!(const Tag)(o);
1129 return
1130 (name != tag.name) ? false : (
1131 (attr != tag.attr) ? false : (
1132 (type != tag.type) ? false : (
1133 true )));
1137 * Compares two Tags
1139 * Example:
1140 * --------------
1141 * Tag tag1,tag2
1142 * if (tag1 < tag2) { }
1143 * --------------
1145 override int opCmp(Object o)
1147 const tag = toType!(const Tag)(o);
1148 // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1149 return
1150 ((name != tag.name) ? ( name < tag.name ? -1 : 1 ) :
1151 ((attr != tag.attr) ? ( cast(void *) attr < cast(void*) tag.attr ? -1 : 1 ) :
1152 ((type != tag.type) ? ( type < tag.type ? -1 : 1 ) :
1153 0 )));
1157 * Returns the hash of a Tag
1159 * You should rarely need to call this function. It exists so that Tags
1160 * can be used as associative array keys.
1162 override size_t toHash()
1164 return typeid(name).getHash(&name);
1168 * Returns the string representation of a Tag
1170 * Example:
1171 * --------------
1172 * auto tag = new Tag("book",TagType.START);
1173 * writefln(tag.toString()); // writes "<book>"
1174 * --------------
1176 override string toString() @safe
1178 if (isEmpty) return toEmptyString();
1179 return (isEnd) ? toEndString() : toStartString();
1182 private
1184 string toNonEndString() @safe
1186 import std.format : format;
1188 string s = "<" ~ name;
1189 foreach (key,val;attr)
1190 s ~= format(" %s=\"%s\"",key,encode(val));
1191 return s;
1194 string toStartString() @safe { return toNonEndString() ~ ">"; }
1196 string toEndString() @safe { return "</" ~ name ~ ">"; }
1198 string toEmptyString() @safe { return toNonEndString() ~ " />"; }
1202 * Returns true if the Tag is a start tag
1204 * Example:
1205 * --------------
1206 * if (tag.isStart) { }
1207 * --------------
1209 @property bool isStart() @safe @nogc pure nothrow { return type == TagType.START; }
1212 * Returns true if the Tag is an end tag
1214 * Example:
1215 * --------------
1216 * if (tag.isEnd) { }
1217 * --------------
1219 @property bool isEnd() @safe @nogc pure nothrow { return type == TagType.END; }
1222 * Returns true if the Tag is an empty tag
1224 * Example:
1225 * --------------
1226 * if (tag.isEmpty) { }
1227 * --------------
1229 @property bool isEmpty() @safe @nogc pure nothrow { return type == TagType.EMPTY; }
1234 * Class representing a comment
1236 class Comment : Item
1238 private string content;
1241 * Construct a comment
1243 * Params:
1244 * content = the body of the comment
1246 * Throws: CommentException if the comment body is illegal (contains "--"
1247 * or exactly equals "-")
1249 * Example:
1250 * --------------
1251 * auto item = new Comment("This is a comment");
1252 * // constructs <!--This is a comment-->
1253 * --------------
1255 this(string content) @safe pure
1257 import std.string : indexOf;
1259 if (content == "-" || content.indexOf("--") != -1)
1260 throw new CommentException(content);
1261 this.content = content;
1265 * Compares two comments for equality
1267 * Example:
1268 * --------------
1269 * Comment item1,item2;
1270 * if (item1 == item2) { }
1271 * --------------
1273 override bool opEquals(scope const Object o) const
1275 const item = toType!(const Item)(o);
1276 const t = cast(const Comment) item;
1277 return t !is null && content == t.content;
1281 * Compares two comments
1283 * You should rarely need to call this function. It exists so that Comments
1284 * can be used as associative array keys.
1286 * Example:
1287 * --------------
1288 * Comment item1,item2;
1289 * if (item1 < item2) { }
1290 * --------------
1292 override int opCmp(scope const Object o) scope const
1294 const item = toType!(const Item)(o);
1295 const t = cast(const Comment) item;
1296 return t !is null && (content != t.content
1297 ? (content < t.content ? -1 : 1 ) : 0 );
1301 * Returns the hash of a Comment
1303 * You should rarely need to call this function. It exists so that Comments
1304 * can be used as associative array keys.
1306 override size_t toHash() scope const nothrow { return hash(content); }
1309 * Returns a string representation of this comment
1311 override string toString() scope const @safe pure nothrow { return "<!--" ~ content ~ "-->"; }
1313 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1316 @safe unittest // issue 16241
1318 import std.exception : assertThrown;
1319 auto c = new Comment("==");
1320 assert(c.content == "==");
1321 assertThrown!CommentException(new Comment("--"));
1325 * Class representing a Character Data section
1327 class CData : Item
1329 private string content;
1332 * Construct a character data section
1334 * Params:
1335 * content = the body of the character data segment
1337 * Throws: CDataException if the segment body is illegal (contains "]]>")
1339 * Example:
1340 * --------------
1341 * auto item = new CData("<b>hello</b>");
1342 * // constructs <![CDATA[<b>hello</b>]]>
1343 * --------------
1345 this(string content) @safe pure
1347 import std.string : indexOf;
1348 if (content.indexOf("]]>") != -1) throw new CDataException(content);
1349 this.content = content;
1353 * Compares two CDatas for equality
1355 * Example:
1356 * --------------
1357 * CData item1,item2;
1358 * if (item1 == item2) { }
1359 * --------------
1361 override bool opEquals(scope const Object o) const
1363 const item = toType!(const Item)(o);
1364 const t = cast(const CData) item;
1365 return t !is null && content == t.content;
1369 * Compares two CDatas
1371 * You should rarely need to call this function. It exists so that CDatas
1372 * can be used as associative array keys.
1374 * Example:
1375 * --------------
1376 * CData item1,item2;
1377 * if (item1 < item2) { }
1378 * --------------
1380 override int opCmp(scope const Object o) scope const
1382 const item = toType!(const Item)(o);
1383 const t = cast(const CData) item;
1384 return t !is null && (content != t.content
1385 ? (content < t.content ? -1 : 1 ) : 0 );
1389 * Returns the hash of a CData
1391 * You should rarely need to call this function. It exists so that CDatas
1392 * can be used as associative array keys.
1394 override size_t toHash() scope const nothrow { return hash(content); }
1397 * Returns a string representation of this CData section
1399 override string toString() scope const @safe pure nothrow { return cdata ~ content ~ "]]>"; }
1401 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1405 * Class representing a text (aka Parsed Character Data) section
1407 class Text : Item
1409 private string content;
1412 * Construct a text (aka PCData) section
1414 * Params:
1415 * content = the text. This function encodes the text before
1416 * insertion, so it is safe to insert any text
1418 * Example:
1419 * --------------
1420 * auto Text = new CData("a < b");
1421 * // constructs a &lt; b
1422 * --------------
1424 this(string content) @safe pure
1426 this.content = encode(content);
1430 * Compares two text sections for equality
1432 * Example:
1433 * --------------
1434 * Text item1,item2;
1435 * if (item1 == item2) { }
1436 * --------------
1438 override bool opEquals(scope const Object o) const
1440 const item = toType!(const Item)(o);
1441 const t = cast(const Text) item;
1442 return t !is null && content == t.content;
1446 * Compares two text sections
1448 * You should rarely need to call this function. It exists so that Texts
1449 * can be used as associative array keys.
1451 * Example:
1452 * --------------
1453 * Text item1,item2;
1454 * if (item1 < item2) { }
1455 * --------------
1457 override int opCmp(scope const Object o) scope const
1459 const item = toType!(const Item)(o);
1460 const t = cast(const Text) item;
1461 return t !is null
1462 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1466 * Returns the hash of a text section
1468 * You should rarely need to call this function. It exists so that Texts
1469 * can be used as associative array keys.
1471 override size_t toHash() scope const nothrow { return hash(content); }
1474 * Returns a string representation of this Text section
1476 override string toString() scope const @safe @nogc pure nothrow { return content; }
1479 * Returns true if the content is the empty string
1481 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content.length == 0; }
1485 * Class representing an XML Instruction section
1487 class XMLInstruction : Item
1489 private string content;
1492 * Construct an XML Instruction section
1494 * Params:
1495 * content = the body of the instruction segment
1497 * Throws: XIException if the segment body is illegal (contains ">")
1499 * Example:
1500 * --------------
1501 * auto item = new XMLInstruction("ATTLIST");
1502 * // constructs <!ATTLIST>
1503 * --------------
1505 this(string content) @safe pure
1507 import std.string : indexOf;
1508 if (content.indexOf(">") != -1) throw new XIException(content);
1509 this.content = content;
1513 * Compares two XML instructions for equality
1515 * Example:
1516 * --------------
1517 * XMLInstruction item1,item2;
1518 * if (item1 == item2) { }
1519 * --------------
1521 override bool opEquals(scope const Object o) const
1523 const item = toType!(const Item)(o);
1524 const t = cast(const XMLInstruction) item;
1525 return t !is null && content == t.content;
1529 * Compares two XML instructions
1531 * You should rarely need to call this function. It exists so that
1532 * XmlInstructions can be used as associative array keys.
1534 * Example:
1535 * --------------
1536 * XMLInstruction item1,item2;
1537 * if (item1 < item2) { }
1538 * --------------
1540 override int opCmp(scope const Object o) scope const
1542 const item = toType!(const Item)(o);
1543 const t = cast(const XMLInstruction) item;
1544 return t !is null
1545 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1549 * Returns the hash of an XMLInstruction
1551 * You should rarely need to call this function. It exists so that
1552 * XmlInstructions can be used as associative array keys.
1554 override size_t toHash() scope const nothrow { return hash(content); }
1557 * Returns a string representation of this XmlInstruction
1559 override string toString() scope const @safe pure nothrow { return "<!" ~ content ~ ">"; }
1561 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1565 * Class representing a Processing Instruction section
1567 class ProcessingInstruction : Item
1569 private string content;
1572 * Construct a Processing Instruction section
1574 * Params:
1575 * content = the body of the instruction segment
1577 * Throws: PIException if the segment body is illegal (contains "?>")
1579 * Example:
1580 * --------------
1581 * auto item = new ProcessingInstruction("php");
1582 * // constructs <?php?>
1583 * --------------
1585 this(string content) @safe pure
1587 import std.string : indexOf;
1588 if (content.indexOf("?>") != -1) throw new PIException(content);
1589 this.content = content;
1593 * Compares two processing instructions for equality
1595 * Example:
1596 * --------------
1597 * ProcessingInstruction item1,item2;
1598 * if (item1 == item2) { }
1599 * --------------
1601 override bool opEquals(scope const Object o) const
1603 const item = toType!(const Item)(o);
1604 const t = cast(const ProcessingInstruction) item;
1605 return t !is null && content == t.content;
1609 * Compares two processing instructions
1611 * You should rarely need to call this function. It exists so that
1612 * ProcessingInstructions can be used as associative array keys.
1614 * Example:
1615 * --------------
1616 * ProcessingInstruction item1,item2;
1617 * if (item1 < item2) { }
1618 * --------------
1620 override int opCmp(scope const Object o) scope const
1622 const item = toType!(const Item)(o);
1623 const t = cast(const ProcessingInstruction) item;
1624 return t !is null
1625 && (content != t.content ? (content < t.content ? -1 : 1 ) : 0 );
1629 * Returns the hash of a ProcessingInstruction
1631 * You should rarely need to call this function. It exists so that
1632 * ProcessingInstructions can be used as associative array keys.
1634 override size_t toHash() scope const nothrow { return hash(content); }
1637 * Returns a string representation of this ProcessingInstruction
1639 override string toString() scope const @safe pure nothrow { return "<?" ~ content ~ "?>"; }
1641 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1645 * Abstract base class for XML items
1647 abstract class Item
1649 /// Compares with another Item of same type for equality
1650 abstract override bool opEquals(scope const Object o) @safe const;
1652 /// Compares with another Item of same type
1653 abstract override int opCmp(scope const Object o) @safe const;
1655 /// Returns the hash of this item
1656 abstract override size_t toHash() @safe scope const;
1658 /// Returns a string representation of this item
1659 abstract override string toString() @safe scope const;
1662 * Returns an indented string representation of this item
1664 * Params:
1665 * indent = number of spaces by which to indent child elements
1667 string[] pretty(uint indent) @safe scope const
1669 import std.string : strip;
1670 string s = strip(toString());
1671 return s.length == 0 ? [] : [ s ];
1674 /// Returns true if the item represents empty XML text
1675 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1679 * Class for parsing an XML Document.
1681 * This is a subclass of ElementParser. Most of the useful functions are
1682 * documented there.
1684 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1686 * Bugs:
1687 * Currently only supports UTF documents.
1689 * If there is an encoding attribute in the prolog, it is ignored.
1692 class DocumentParser : ElementParser
1694 string xmlText;
1697 * Constructs a DocumentParser.
1699 * The input to this function MUST be valid XML.
1700 * This is enforced by the function's in contract.
1702 * Params:
1703 * xmlText_ = the entire XML document as text
1706 this(string xmlText_)
1709 assert(xmlText_.length != 0);
1712 // Confirm that the input is valid XML
1713 check(xmlText_);
1715 catch (CheckException e)
1717 // And if it's not, tell the user why not
1718 assert(false, "\n" ~ e.toString());
1721 body
1723 xmlText = xmlText_;
1724 s = &xmlText;
1725 super(); // Initialize everything
1726 parse(); // Parse through the root tag (but not beyond)
1730 @system unittest
1732 auto doc = new Document("<root><child><grandchild/></child></root>");
1733 assert(doc.elements.length == 1);
1734 assert(doc.elements[0].tag.name == "child");
1735 assert(doc.items == doc.elements);
1739 * Class for parsing an XML element.
1741 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1743 * Note that you cannot construct instances of this class directly. You can
1744 * construct a DocumentParser (which is a subclass of ElementParser), but
1745 * otherwise, Instances of ElementParser will be created for you by the
1746 * library, and passed your way via onStartTag handlers.
1749 class ElementParser
1751 alias Handler = void delegate(string);
1752 alias ElementHandler = void delegate(in Element element);
1753 alias ParserHandler = void delegate(ElementParser parser);
1755 private
1757 Tag tag_;
1758 string elementStart;
1759 string* s;
1761 Handler commentHandler = null;
1762 Handler cdataHandler = null;
1763 Handler xiHandler = null;
1764 Handler piHandler = null;
1765 Handler rawTextHandler = null;
1766 Handler textHandler = null;
1768 // Private constructor for start tags
1769 this(ElementParser parent) @safe @nogc pure nothrow
1771 s = parent.s;
1772 this();
1773 tag_ = parent.tag_;
1776 // Private constructor for empty tags
1777 this(Tag tag, string* t) @safe @nogc pure nothrow
1779 s = t;
1780 this();
1781 tag_ = tag;
1786 * The Tag at the start of the element being parsed. You can read this to
1787 * determine the tag's name and attributes.
1789 @property @safe @nogc pure nothrow const(Tag) tag() const { return tag_; }
1792 * Register a handler which will be called whenever a start tag is
1793 * encountered which matches the specified name. You can also pass null as
1794 * the name, in which case the handler will be called for any unmatched
1795 * start tag.
1797 * Example:
1798 * --------------
1799 * // Call this function whenever a <podcast> start tag is encountered
1800 * onStartTag["podcast"] = (ElementParser xml)
1802 * // Your code here
1803 * //
1804 * // This is a a closure, so code here may reference
1805 * // variables which are outside of this scope
1806 * };
1808 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809 * // start tag is encountered
1810 * onStartTag["episode"] = &myEpisodeStartHandler;
1812 * // call delegate dg for all other start tags
1813 * onStartTag[null] = dg;
1814 * --------------
1816 * This library will supply your function with a new instance of
1817 * ElementHandler, which may be used to parse inside the element whose
1818 * start tag was just found, or to identify the tag attributes of the
1819 * element, etc.
1821 * Note that your function will be called for both start tags and empty
1822 * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1823 * and &lt;br/&gt;.
1825 ParserHandler[string] onStartTag;
1828 * Register a handler which will be called whenever an end tag is
1829 * encountered which matches the specified name. You can also pass null as
1830 * the name, in which case the handler will be called for any unmatched
1831 * end tag.
1833 * Example:
1834 * --------------
1835 * // Call this function whenever a </podcast> end tag is encountered
1836 * onEndTag["podcast"] = (in Element e)
1838 * // Your code here
1839 * //
1840 * // This is a a closure, so code here may reference
1841 * // variables which are outside of this scope
1842 * };
1844 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845 * // end tag is encountered
1846 * onEndTag["episode"] = &myEpisodeEndHandler;
1848 * // call delegate dg for all other end tags
1849 * onEndTag[null] = dg;
1850 * --------------
1852 * Note that your function will be called for both start tags and empty
1853 * tags. That is, we make no distinction between &lt;br&gt;&lt;/br&gt;
1854 * and &lt;br/&gt;.
1856 ElementHandler[string] onEndTag;
1858 protected this() @safe @nogc pure nothrow
1860 elementStart = *s;
1864 * Register a handler which will be called whenever text is encountered.
1866 * Example:
1867 * --------------
1868 * // Call this function whenever text is encountered
1869 * onText = (string s)
1871 * // Your code here
1873 * // The passed parameter s will have been decoded by the time you see
1874 * // it, and so may contain any character.
1875 * //
1876 * // This is a a closure, so code here may reference
1877 * // variables which are outside of this scope
1878 * };
1879 * --------------
1881 @property @safe @nogc pure nothrow void onText(Handler handler) { textHandler = handler; }
1884 * Register an alternative handler which will be called whenever text
1885 * is encountered. This differs from onText in that onText will decode
1886 * the text, whereas onTextRaw will not. This allows you to make design
1887 * choices, since onText will be more accurate, but slower, while
1888 * onTextRaw will be faster, but less accurate. Of course, you can
1889 * still call decode() within your handler, if you want, but you'd
1890 * probably want to use onTextRaw only in circumstances where you
1891 * know that decoding is unnecessary.
1893 * Example:
1894 * --------------
1895 * // Call this function whenever text is encountered
1896 * onText = (string s)
1898 * // Your code here
1900 * // The passed parameter s will NOT have been decoded.
1901 * //
1902 * // This is a a closure, so code here may reference
1903 * // variables which are outside of this scope
1904 * };
1905 * --------------
1907 @safe @nogc pure nothrow void onTextRaw(Handler handler) { rawTextHandler = handler; }
1910 * Register a handler which will be called whenever a character data
1911 * segment is encountered.
1913 * Example:
1914 * --------------
1915 * // Call this function whenever a CData section is encountered
1916 * onCData = (string s)
1918 * // Your code here
1920 * // The passed parameter s does not include the opening <![CDATA[
1921 * // nor closing ]]>
1922 * //
1923 * // This is a a closure, so code here may reference
1924 * // variables which are outside of this scope
1925 * };
1926 * --------------
1928 @property @safe @nogc pure nothrow void onCData(Handler handler) { cdataHandler = handler; }
1931 * Register a handler which will be called whenever a comment is
1932 * encountered.
1934 * Example:
1935 * --------------
1936 * // Call this function whenever a comment is encountered
1937 * onComment = (string s)
1939 * // Your code here
1941 * // The passed parameter s does not include the opening <!-- nor
1942 * // closing -->
1943 * //
1944 * // This is a a closure, so code here may reference
1945 * // variables which are outside of this scope
1946 * };
1947 * --------------
1949 @property @safe @nogc pure nothrow void onComment(Handler handler) { commentHandler = handler; }
1952 * Register a handler which will be called whenever a processing
1953 * instruction is encountered.
1955 * Example:
1956 * --------------
1957 * // Call this function whenever a processing instruction is encountered
1958 * onPI = (string s)
1960 * // Your code here
1962 * // The passed parameter s does not include the opening <? nor
1963 * // closing ?>
1964 * //
1965 * // This is a a closure, so code here may reference
1966 * // variables which are outside of this scope
1967 * };
1968 * --------------
1970 @property @safe @nogc pure nothrow void onPI(Handler handler) { piHandler = handler; }
1973 * Register a handler which will be called whenever an XML instruction is
1974 * encountered.
1976 * Example:
1977 * --------------
1978 * // Call this function whenever an XML instruction is encountered
1979 * // (Note: XML instructions may only occur preceding the root tag of a
1980 * // document).
1981 * onPI = (string s)
1983 * // Your code here
1985 * // The passed parameter s does not include the opening <! nor
1986 * // closing >
1987 * //
1988 * // This is a a closure, so code here may reference
1989 * // variables which are outside of this scope
1990 * };
1991 * --------------
1993 @property @safe @nogc pure nothrow void onXI(Handler handler) { xiHandler = handler; }
1996 * Parse an XML element.
1998 * Parsing will continue until the end of the current element. Any items
1999 * encountered for which a handler has been registered will invoke that
2000 * handler.
2002 * Throws: various kinds of XMLException
2004 void parse()
2006 import std.algorithm.searching : startsWith;
2007 import std.string : indexOf;
2009 string t;
2010 const Tag root = tag_;
2011 Tag[string] startTags;
2012 if (tag_ !is null) startTags[tag_.name] = tag_;
2014 while (s.length != 0)
2016 if (startsWith(*s,"<!--"))
2018 chop(*s,4);
2019 t = chop(*s,indexOf(*s,"-->"));
2020 if (commentHandler.funcptr !is null) commentHandler(t);
2021 chop(*s,3);
2023 else if (startsWith(*s,"<![CDATA["))
2025 chop(*s,9);
2026 t = chop(*s,indexOf(*s,"]]>"));
2027 if (cdataHandler.funcptr !is null) cdataHandler(t);
2028 chop(*s,3);
2030 else if (startsWith(*s,"<!"))
2032 chop(*s,2);
2033 t = chop(*s,indexOf(*s,">"));
2034 if (xiHandler.funcptr !is null) xiHandler(t);
2035 chop(*s,1);
2037 else if (startsWith(*s,"<?"))
2039 chop(*s,2);
2040 t = chop(*s,indexOf(*s,"?>"));
2041 if (piHandler.funcptr !is null) piHandler(t);
2042 chop(*s,2);
2044 else if (startsWith(*s,"<"))
2046 tag_ = new Tag(*s,true);
2047 if (root is null)
2048 return; // Return to constructor of derived class
2050 if (tag_.isStart)
2052 startTags[tag_.name] = tag_;
2054 auto parser = new ElementParser(this);
2056 auto handler = tag_.name in onStartTag;
2057 if (handler !is null) (*handler)(parser);
2058 else
2060 handler = null in onStartTag;
2061 if (handler !is null) (*handler)(parser);
2064 else if (tag_.isEnd)
2066 const startTag = startTags[tag_.name];
2067 string text;
2069 if (startTag.tagString.length == 0)
2070 assert(0);
2072 immutable(char)* p = startTag.tagString.ptr
2073 + startTag.tagString.length;
2074 immutable(char)* q = &tag_.tagString[0];
2075 text = decode(p[0..(q-p)], DecodeMode.LOOSE);
2077 auto element = new Element(startTag);
2078 if (text.length != 0) element ~= new Text(text);
2080 auto handler = tag_.name in onEndTag;
2081 if (handler !is null) (*handler)(element);
2082 else
2084 handler = null in onEndTag;
2085 if (handler !is null) (*handler)(element);
2088 if (tag_.name == root.name) return;
2090 else if (tag_.isEmpty)
2092 Tag startTag = new Tag(tag_.name);
2094 // FIX by hed010gy, for bug 2979
2095 // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096 if (tag_.attr.length > 0)
2097 foreach (tn,tv; tag_.attr) startTag.attr[tn]=tv;
2098 // END FIX
2100 // Handle the pretend start tag
2101 string s2;
2102 auto parser = new ElementParser(startTag,&s2);
2103 auto handler1 = startTag.name in onStartTag;
2104 if (handler1 !is null) (*handler1)(parser);
2105 else
2107 handler1 = null in onStartTag;
2108 if (handler1 !is null) (*handler1)(parser);
2111 // Handle the pretend end tag
2112 auto element = new Element(startTag);
2113 auto handler2 = tag_.name in onEndTag;
2114 if (handler2 !is null) (*handler2)(element);
2115 else
2117 handler2 = null in onEndTag;
2118 if (handler2 !is null) (*handler2)(element);
2122 else
2124 t = chop(*s,indexOf(*s,"<"));
2125 if (rawTextHandler.funcptr !is null)
2126 rawTextHandler(t);
2127 else if (textHandler.funcptr !is null)
2128 textHandler(decode(t,DecodeMode.LOOSE));
2134 * Returns that part of the element which has already been parsed
2136 override string toString() const @nogc @safe pure nothrow
2138 assert(elementStart.length >= s.length);
2139 return elementStart[0 .. elementStart.length - s.length];
2144 private
2146 template Check(string msg)
2148 string old = s;
2150 void fail() @safe pure
2152 s = old;
2153 throw new Err(s,msg);
2156 void fail(Err e) @safe pure
2158 s = old;
2159 throw new Err(s,msg,e);
2162 void fail(string msg2) @safe pure
2164 fail(new Err(s,msg2));
2168 void checkMisc(ref string s) @safe pure // rule 27
2170 import std.algorithm.searching : startsWith;
2172 mixin Check!("Misc");
2176 if (s.startsWith("<!--")) { checkComment(s); }
2177 else if (s.startsWith("<?")) { checkPI(s); }
2178 else { checkSpace(s); }
2180 catch (Err e) { fail(e); }
2183 void checkDocument(ref string s) @safe pure // rule 1
2185 mixin Check!("Document");
2188 checkProlog(s);
2189 checkElement(s);
2190 star!(checkMisc)(s);
2192 catch (Err e) { fail(e); }
2195 void checkChars(ref string s) @safe pure // rule 2
2197 // TO DO - Fix std.utf stride and decode functions, then use those
2198 // instead
2199 import std.format : format;
2201 mixin Check!("Chars");
2203 dchar c;
2204 int n = -1;
2205 foreach (int i,dchar d; s)
2207 if (!isChar(d))
2209 c = d;
2210 n = i;
2211 break;
2214 if (n != -1)
2216 s = s[n..$];
2217 fail(format("invalid character: U+%04X",c));
2221 void checkSpace(ref string s) @safe pure // rule 3
2223 import std.algorithm.searching : countUntil;
2224 import std.ascii : isWhite;
2225 import std.utf : byCodeUnit;
2227 mixin Check!("Whitespace");
2228 ptrdiff_t i = s.byCodeUnit.countUntil!(a => !isWhite(a));
2229 if (i == -1 && s.length > 0 && isWhite(s[0]))
2230 s = s[$ .. $];
2231 else if (i > -1)
2232 s = s[i .. $];
2233 if (s is old) fail();
2236 void checkName(ref string s, out string name) @safe pure // rule 5
2238 mixin Check!("Name");
2240 if (s.length == 0) fail();
2241 int n;
2242 foreach (int i,dchar c;s)
2244 if (c == '_' || c == ':' || isLetter(c)) continue;
2245 if (i == 0) fail();
2246 if (c == '-' || c == '.' || isDigit(c)
2247 || isCombiningChar(c) || isExtender(c)) continue;
2248 n = i;
2249 break;
2251 name = s[0 .. n];
2252 s = s[n..$];
2255 void checkAttValue(ref string s) @safe pure // rule 10
2257 import std.algorithm.searching : countUntil;
2258 import std.utf : byCodeUnit;
2260 mixin Check!("AttValue");
2262 if (s.length == 0) fail();
2263 char c = s[0];
2264 if (c != '\u0022' && c != '\u0027')
2265 fail("attribute value requires quotes");
2266 s = s[1..$];
2267 for (;;)
2269 s = s[s.byCodeUnit.countUntil(c) .. $];
2270 if (s.length == 0) fail("unterminated attribute value");
2271 if (s[0] == '<') fail("< found in attribute value");
2272 if (s[0] == c) break;
2273 try { checkReference(s); } catch (Err e) { fail(e); }
2275 s = s[1..$];
2278 void checkCharData(ref string s) @safe pure // rule 14
2280 import std.algorithm.searching : startsWith;
2282 mixin Check!("CharData");
2284 while (s.length != 0)
2286 if (s.startsWith("&")) break;
2287 if (s.startsWith("<")) break;
2288 if (s.startsWith("]]>")) fail("]]> found within char data");
2289 s = s[1..$];
2293 void checkComment(ref string s) @safe pure // rule 15
2295 import std.string : indexOf;
2297 mixin Check!("Comment");
2299 try { checkLiteral("<!--",s); } catch (Err e) { fail(e); }
2300 ptrdiff_t n = s.indexOf("--");
2301 if (n == -1) fail("unterminated comment");
2302 s = s[n..$];
2303 try { checkLiteral("-->",s); } catch (Err e) { fail(e); }
2306 void checkPI(ref string s) @safe pure // rule 16
2308 mixin Check!("PI");
2312 checkLiteral("<?",s);
2313 checkEnd("?>",s);
2315 catch (Err e) { fail(e); }
2318 void checkCDSect(ref string s) @safe pure // rule 18
2320 mixin Check!("CDSect");
2324 checkLiteral(cdata,s);
2325 checkEnd("]]>",s);
2327 catch (Err e) { fail(e); }
2330 void checkProlog(ref string s) @safe pure // rule 22
2332 mixin Check!("Prolog");
2336 /* The XML declaration is optional
2337 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2339 opt!(checkXMLDecl)(s);
2341 star!(checkMisc)(s);
2342 opt!(seq!(checkDocTypeDecl,star!(checkMisc)))(s);
2344 catch (Err e) { fail(e); }
2347 void checkXMLDecl(ref string s) @safe pure // rule 23
2349 mixin Check!("XMLDecl");
2353 checkLiteral("<?xml",s);
2354 checkVersionInfo(s);
2355 opt!(checkEncodingDecl)(s);
2356 opt!(checkSDDecl)(s);
2357 opt!(checkSpace)(s);
2358 checkLiteral("?>",s);
2360 catch (Err e) { fail(e); }
2363 void checkVersionInfo(ref string s) @safe pure // rule 24
2365 mixin Check!("VersionInfo");
2369 checkSpace(s);
2370 checkLiteral("version",s);
2371 checkEq(s);
2372 quoted!(checkVersionNum)(s);
2374 catch (Err e) { fail(e); }
2377 void checkEq(ref string s) @safe pure // rule 25
2379 mixin Check!("Eq");
2383 opt!(checkSpace)(s);
2384 checkLiteral("=",s);
2385 opt!(checkSpace)(s);
2387 catch (Err e) { fail(e); }
2390 void checkVersionNum(ref string s) @safe pure // rule 26
2392 import std.algorithm.searching : countUntil;
2393 import std.utf : byCodeUnit;
2395 mixin Check!("VersionNum");
2397 s = s[s.byCodeUnit.countUntil('\"') .. $];
2398 if (s is old) fail();
2401 void checkDocTypeDecl(ref string s) @safe pure // rule 28
2403 mixin Check!("DocTypeDecl");
2407 checkLiteral("<!DOCTYPE",s);
2409 // TO DO -- ensure DOCTYPE is well formed
2410 // (But not yet. That's one of our "future directions")
2412 checkEnd(">",s);
2414 catch (Err e) { fail(e); }
2417 void checkSDDecl(ref string s) @safe pure // rule 32
2419 import std.algorithm.searching : startsWith;
2421 mixin Check!("SDDecl");
2425 checkSpace(s);
2426 checkLiteral("standalone",s);
2427 checkEq(s);
2429 catch (Err e) { fail(e); }
2431 int n = 0;
2432 if (s.startsWith("'yes'") || s.startsWith("\"yes\"")) n = 5;
2433 else if (s.startsWith("'no'" ) || s.startsWith("\"no\"" )) n = 4;
2434 else fail("standalone attribute value must be 'yes', \"yes\","~
2435 " 'no' or \"no\"");
2436 s = s[n..$];
2439 void checkElement(ref string s) @safe pure // rule 39
2441 mixin Check!("Element");
2443 string sname,ename,t;
2444 try { checkTag(s,t,sname); } catch (Err e) { fail(e); }
2446 if (t == "STag")
2450 checkContent(s);
2451 t = s;
2452 checkETag(s,ename);
2454 catch (Err e) { fail(e); }
2456 if (sname != ename)
2458 s = t;
2459 fail("end tag name \"" ~ ename
2460 ~ "\" differs from start tag name \""~sname~"\"");
2465 // rules 40 and 44
2466 void checkTag(ref string s, out string type, out string name) @safe pure
2468 mixin Check!("Tag");
2472 type = "STag";
2473 checkLiteral("<",s);
2474 checkName(s,name);
2475 star!(seq!(checkSpace,checkAttribute))(s);
2476 opt!(checkSpace)(s);
2477 if (s.length != 0 && s[0] == '/')
2479 s = s[1..$];
2480 type = "ETag";
2482 checkLiteral(">",s);
2484 catch (Err e) { fail(e); }
2487 void checkAttribute(ref string s) @safe pure // rule 41
2489 mixin Check!("Attribute");
2493 string name;
2494 checkName(s,name);
2495 checkEq(s);
2496 checkAttValue(s);
2498 catch (Err e) { fail(e); }
2501 void checkETag(ref string s, out string name) @safe pure // rule 42
2503 mixin Check!("ETag");
2507 checkLiteral("</",s);
2508 checkName(s,name);
2509 opt!(checkSpace)(s);
2510 checkLiteral(">",s);
2512 catch (Err e) { fail(e); }
2515 void checkContent(ref string s) @safe pure // rule 43
2517 import std.algorithm.searching : startsWith;
2519 mixin Check!("Content");
2523 while (s.length != 0)
2525 old = s;
2526 if (s.startsWith("&")) { checkReference(s); }
2527 else if (s.startsWith("<!--")) { checkComment(s); }
2528 else if (s.startsWith("<?")) { checkPI(s); }
2529 else if (s.startsWith(cdata)) { checkCDSect(s); }
2530 else if (s.startsWith("</")) { break; }
2531 else if (s.startsWith("<")) { checkElement(s); }
2532 else { checkCharData(s); }
2535 catch (Err e) { fail(e); }
2538 void checkCharRef(ref string s, out dchar c) @safe pure // rule 66
2540 import std.format : format;
2542 mixin Check!("CharRef");
2544 c = 0;
2545 try { checkLiteral("&#",s); } catch (Err e) { fail(e); }
2546 int radix = 10;
2547 if (s.length != 0 && s[0] == 'x')
2549 s = s[1..$];
2550 radix = 16;
2552 if (s.length == 0) fail("unterminated character reference");
2553 if (s[0] == ';')
2554 fail("character reference must have at least one digit");
2555 while (s.length != 0)
2557 immutable char d = s[0];
2558 int n = 0;
2559 switch (d)
2561 case 'F','f': ++n; goto case;
2562 case 'E','e': ++n; goto case;
2563 case 'D','d': ++n; goto case;
2564 case 'C','c': ++n; goto case;
2565 case 'B','b': ++n; goto case;
2566 case 'A','a': ++n; goto case;
2567 case '9': ++n; goto case;
2568 case '8': ++n; goto case;
2569 case '7': ++n; goto case;
2570 case '6': ++n; goto case;
2571 case '5': ++n; goto case;
2572 case '4': ++n; goto case;
2573 case '3': ++n; goto case;
2574 case '2': ++n; goto case;
2575 case '1': ++n; goto case;
2576 case '0': break;
2577 default: n = 100; break;
2579 if (n >= radix) break;
2580 c *= radix;
2581 c += n;
2582 s = s[1..$];
2584 if (!isChar(c)) fail(format("U+%04X is not a legal character",c));
2585 if (s.length == 0 || s[0] != ';') fail("expected ;");
2586 else s = s[1..$];
2589 void checkReference(ref string s) @safe pure // rule 67
2591 import std.algorithm.searching : startsWith;
2593 mixin Check!("Reference");
2597 dchar c;
2598 if (s.startsWith("&#")) checkCharRef(s,c);
2599 else checkEntityRef(s);
2601 catch (Err e) { fail(e); }
2604 void checkEntityRef(ref string s) @safe pure // rule 68
2606 mixin Check!("EntityRef");
2610 string name;
2611 checkLiteral("&",s);
2612 checkName(s,name);
2613 checkLiteral(";",s);
2615 catch (Err e) { fail(e); }
2618 void checkEncName(ref string s) @safe pure // rule 81
2620 import std.algorithm.searching : countUntil;
2621 import std.ascii : isAlpha;
2622 import std.utf : byCodeUnit;
2624 mixin Check!("EncName");
2626 s = s[s.byCodeUnit.countUntil!(a => !isAlpha(a)) .. $];
2627 if (s is old) fail();
2628 s = s[s.byCodeUnit.countUntil('\"', '\'') .. $];
2631 void checkEncodingDecl(ref string s) @safe pure // rule 80
2633 mixin Check!("EncodingDecl");
2637 checkSpace(s);
2638 checkLiteral("encoding",s);
2639 checkEq(s);
2640 quoted!(checkEncName)(s);
2642 catch (Err e) { fail(e); }
2645 // Helper functions
2647 void checkLiteral(string literal,ref string s) @safe pure
2649 import std.string : startsWith;
2651 mixin Check!("Literal");
2653 if (!s.startsWith(literal)) fail("Expected literal \""~literal~"\"");
2654 s = s[literal.length..$];
2657 void checkEnd(string end,ref string s) @safe pure
2659 import std.string : indexOf;
2660 // Deliberately no mixin Check here.
2662 auto n = s.indexOf(end);
2663 if (n == -1) throw new Err(s,"Unable to find terminating \""~end~"\"");
2664 s = s[n..$];
2665 checkLiteral(end,s);
2668 // Metafunctions -- none of these use mixin Check
2670 void opt(alias f)(ref string s)
2672 try { f(s); } catch (Err e) {}
2675 void plus(alias f)(ref string s)
2677 f(s);
2678 star!(f)(s);
2681 void star(alias f)(ref string s)
2683 while (s.length != 0)
2685 try { f(s); }
2686 catch (Err e) { return; }
2690 void quoted(alias f)(ref string s)
2692 import std.string : startsWith;
2694 if (s.startsWith("'"))
2696 checkLiteral("'",s);
2697 f(s);
2698 checkLiteral("'",s);
2700 else
2702 checkLiteral("\"",s);
2703 f(s);
2704 checkLiteral("\"",s);
2708 void seq(alias f,alias g)(ref string s)
2710 f(s);
2711 g(s);
2716 * Check an entire XML document for well-formedness
2718 * Params:
2719 * s = the document to be checked, passed as a string
2721 * Throws: CheckException if the document is not well formed
2723 * CheckException's toString() method will yield the complete hierarchy of
2724 * parse failure (the XML equivalent of a stack trace), giving the line and
2725 * column number of every failure at every level.
2727 void check(string s) @safe pure
2731 checkChars(s);
2732 checkDocument(s);
2733 if (s.length != 0) throw new Err(s,"Junk found after document");
2735 catch (Err e)
2737 e.complete(s);
2738 throw e;
2742 @system pure unittest
2744 import std.string : indexOf;
2748 check(q"[<?xml version="1.0"?>
2749 <catalog>
2750 <book id="bk101">
2751 <author>Gambardella, Matthew</author>
2752 <title>XML Developer's Guide</title>
2753 <genre>Computer</genre>
2754 <price>44.95</price>
2755 <publish_date>2000-10-01</publish_date>
2756 <description>An in-depth look at creating applications
2757 with XML.</description>
2758 </book>
2759 <book id="bk102">
2760 <author>Ralls, Kim</author>
2761 <title>Midnight Rain</title>
2762 <genre>Fantasy</genres>
2763 <price>5.95</price>
2764 <publish_date>2000-12-16</publish_date>
2765 <description>A former architect battles corporate zombies,
2766 an evil sorceress, and her own childhood to become queen
2767 of the world.</description>
2768 </book>
2769 <book id="bk103">
2770 <author>Corets, Eva</author>
2771 <title>Maeve Ascendant</title>
2772 <genre>Fantasy</genre>
2773 <price>5.95</price>
2774 <publish_date>2000-11-17</publish_date>
2775 <description>After the collapse of a nanotechnology
2776 society in England, the young survivors lay the
2777 foundation for a new society.</description>
2778 </book>
2779 </catalog>
2780 ]");
2781 assert(false);
2783 catch (CheckException e)
2785 auto n = e.toString().indexOf("end tag name \"genres\" differs"~
2786 " from start tag name \"genre\"");
2787 assert(n != -1);
2791 @system unittest
2793 string s = q"EOS
2794 <?xml version="1.0"?>
2795 <set>
2796 <one>A</one>
2797 <!-- comment -->
2798 <two>B</two>
2799 </set>
2800 EOS";
2803 check(s);
2805 catch (CheckException e)
2807 assert(0, e.toString());
2811 @system unittest
2813 string test_xml = `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2814 xmlns:stream="http://etherx.'jabber'.org/streams"
2815 xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2816 xml:lang="en" version="1.0" attr='a"b"c'>
2817 </stream:stream></r>`;
2819 DocumentParser parser = new DocumentParser(test_xml);
2820 bool tested = false;
2821 parser.onStartTag["stream:stream"] = (ElementParser p) {
2822 assert(p.tag.attr["xmlns"] == "jabber:'client'");
2823 assert(p.tag.attr["from"] == "jid.pl");
2824 assert(p.tag.attr["attr"] == "a\"b\"c");
2825 tested = true;
2827 parser.parse();
2828 assert(tested);
2831 @system unittest
2833 string s = q"EOS
2834 <?xml version="1.0" encoding="utf-8"?> <Tests>
2835 <Test thing="What &amp; Up">What &amp; Up Second</Test>
2836 </Tests>
2837 EOS";
2838 auto xml = new DocumentParser(s);
2840 xml.onStartTag["Test"] = (ElementParser xml) {
2841 assert(xml.tag.attr["thing"] == "What & Up");
2844 xml.onEndTag["Test"] = (in Element e) {
2845 assert(e.text() == "What & Up Second");
2847 xml.parse();
2850 @system unittest
2852 string s = `<tag attr="&quot;value&gt;" />`;
2853 auto doc = new Document(s);
2854 assert(doc.toString() == s);
2857 /** The base class for exceptions thrown by this module */
2858 class XMLException : Exception { this(string msg) @safe pure { super(msg); } }
2860 // Other exceptions
2862 /// Thrown during Comment constructor
2863 class CommentException : XMLException
2864 { private this(string msg) @safe pure { super(msg); } }
2866 /// Thrown during CData constructor
2867 class CDataException : XMLException
2868 { private this(string msg) @safe pure { super(msg); } }
2870 /// Thrown during XMLInstruction constructor
2871 class XIException : XMLException
2872 { private this(string msg) @safe pure { super(msg); } }
2874 /// Thrown during ProcessingInstruction constructor
2875 class PIException : XMLException
2876 { private this(string msg) @safe pure { super(msg); } }
2878 /// Thrown during Text constructor
2879 class TextException : XMLException
2880 { private this(string msg) @safe pure { super(msg); } }
2882 /// Thrown during decode()
2883 class DecodeException : XMLException
2884 { private this(string msg) @safe pure { super(msg); } }
2886 /// Thrown if comparing with wrong type
2887 class InvalidTypeException : XMLException
2888 { private this(string msg) @safe pure { super(msg); } }
2890 /// Thrown when parsing for Tags
2891 class TagException : XMLException
2892 { private this(string msg) @safe pure { super(msg); } }
2895 * Thrown during check()
2897 class CheckException : XMLException
2899 CheckException err; /// Parent in hierarchy
2900 private string tail;
2902 * Name of production rule which failed to parse,
2903 * or specific error message
2905 string msg;
2906 size_t line = 0; /// Line number at which parse failure occurred
2907 size_t column = 0; /// Column number at which parse failure occurred
2909 private this(string tail,string msg,Err err=null) @safe pure
2911 super(null);
2912 this.tail = tail;
2913 this.msg = msg;
2914 this.err = err;
2917 private void complete(string entire) @safe pure
2919 import std.string : count, lastIndexOf;
2920 import std.utf : toUTF32;
2922 string head = entire[0..$-tail.length];
2923 ptrdiff_t n = head.lastIndexOf('\n') + 1;
2924 line = head.count("\n") + 1;
2925 dstring t = toUTF32(head[n..$]);
2926 column = t.length + 1;
2927 if (err !is null) err.complete(entire);
2930 override string toString() const @safe pure
2932 import std.format : format;
2934 string s;
2935 if (line != 0) s = format("Line %d, column %d: ",line,column);
2936 s ~= msg;
2937 s ~= '\n';
2938 if (err !is null) s = err.toString() ~ s;
2939 return s;
2943 private alias Err = CheckException;
2945 // Private helper functions
2947 private
2949 inout(T) toType(T)(inout Object o)
2951 T t = cast(T)(o);
2952 if (t is null)
2954 throw new InvalidTypeException("Attempt to compare a "
2955 ~ T.stringof ~ " with an instance of another type");
2957 return t;
2960 string chop(ref string s, size_t n) @safe pure nothrow
2962 if (n == -1) n = s.length;
2963 string t = s[0 .. n];
2964 s = s[n..$];
2965 return t;
2968 bool optc(ref string s, char c) @safe pure nothrow
2970 immutable bool b = s.length != 0 && s[0] == c;
2971 if (b) s = s[1..$];
2972 return b;
2975 void reqc(ref string s, char c) @safe pure
2977 if (s.length == 0 || s[0] != c) throw new TagException("");
2978 s = s[1..$];
2981 char requireOneOf(ref string s, string chars) @safe pure
2983 import std.string : indexOf;
2985 if (s.length == 0 || indexOf(chars,s[0]) == -1)
2986 throw new TagException("");
2987 immutable char ch = s[0];
2988 s = s[1..$];
2989 return ch;
2992 size_t hash(string s,size_t h=0) @trusted nothrow
2994 return typeid(s).getHash(&s) + h;
2997 // Definitions from the XML specification
2998 immutable CharTable=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
2999 0x10000,0x10FFFF];
3000 immutable BaseCharTable=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3001 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3002 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3003 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3004 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3005 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3006 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3007 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3008 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3009 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3010 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3011 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3012 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3013 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3014 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3015 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3016 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3017 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3018 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3019 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3020 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3021 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3022 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3023 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3024 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3025 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3026 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3027 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3028 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3029 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3030 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3031 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3032 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3033 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3034 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3035 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3036 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3037 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3038 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3039 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3040 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3041 immutable IdeographicTable=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3042 immutable CombiningCharTable=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3043 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3044 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3045 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3046 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3047 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3048 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3049 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3050 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3051 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3052 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3053 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3054 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3055 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3056 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3057 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3058 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3059 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3060 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3061 0x3099,0x3099,0x309A,0x309A];
3062 immutable DigitTable=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3063 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3064 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3065 0x0ED9,0x0F20,0x0F29];
3066 immutable ExtenderTable=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3067 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3068 0x3035,0x309D,0x309E,0x30FC,0x30FE];
3070 bool lookup(const(int)[] table, int c) @safe @nogc nothrow pure
3072 while (table.length != 0)
3074 auto m = (table.length >> 1) & ~1;
3075 if (c < table[m])
3077 table = table[0 .. m];
3079 else if (c > table[m+1])
3081 table = table[m+2..$];
3083 else return true;
3085 return false;
3088 string startOf(string s) @safe nothrow pure
3090 string r;
3091 foreach (char c;s)
3093 r ~= (c < 0x20 || c > 0x7F) ? '.' : c;
3094 if (r.length >= 40) { r ~= "___"; break; }
3096 return r;
3099 void exit(string s=null)
3101 throw new XMLException(s);