1 // Written in the D programming language.
4 $(RED Warning: This module is considered out-dated and not up to Phobos'
5 current standards. It will remain until we have a suitable replacement,
6 but be aware that it will not remain long term.)
8 Classes and functions for creating and parsing XML
10 The basic architecture of this module is that there are standalone functions,
11 classes for constructing an XML document from scratch (Tag, Element and
12 Document), and also classes for parsing a pre-existing XML file (ElementParser
13 and DocumentParser). The parsing classes <i>may</i> be used to build a
14 Document, but that is not their primary purpose. The handling capabilities of
15 DocumentParser and ElementParser are sufficiently customizable that you can
16 make them do pretty much whatever you want.
18 Example: This example creates a DOM (Document Object Model) tree
20 ------------------------------------------------------------------------------
26 // books.xml is used in various samples throughout the Microsoft XML Core
27 // Services (MSXML) SDK.
29 // See http://msdn2.microsoft.com/en-us/library/ms762271(VS.85).aspx
33 string s = cast(string) std.file.read("books.xml");
35 // Check for well-formedness
39 auto doc = new Document(s);
44 ------------------------------------------------------------------------------
46 Example: This example does much the same thing, except that the file is
47 deconstructed and reconstructed by hand. This is more work, but the
48 techniques involved offer vastly more power.
49 ------------------------------------------------------------------------------
67 string s = cast(string) std.file.read("books.xml");
69 // Check for well-formedness
75 auto xml = new DocumentParser(s);
76 xml.onStartTag["book"] = (ElementParser xml)
79 book.id = xml.tag.attr["id"];
81 xml.onEndTag["author"] = (in Element e) { book.author = e.text(); };
82 xml.onEndTag["title"] = (in Element e) { book.title = e.text(); };
83 xml.onEndTag["genre"] = (in Element e) { book.genre = e.text(); };
84 xml.onEndTag["price"] = (in Element e) { book.price = e.text(); };
85 xml.onEndTag["publish-date"] = (in Element e) { book.pubDate = e.text(); };
86 xml.onEndTag["description"] = (in Element e) { book.description = e.text(); };
94 // Put it back together again;
95 auto doc = new Document(new Tag("catalog"));
98 auto element = new Element("book");
99 element.tag.attr["id"] = book.id;
101 element ~= new Element("author", book.author);
102 element ~= new Element("title", book.title);
103 element ~= new Element("genre", book.genre);
104 element ~= new Element("price", book.price);
105 element ~= new Element("publish-date",book.pubDate);
106 element ~= new Element("description", book.description);
112 writefln(join(doc.pretty(3),"\n"));
114 -------------------------------------------------------------------------------
115 Copyright: Copyright Janice Caron 2008 - 2009.
116 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
117 Authors: Janice Caron
118 Source: $(PHOBOSSRC std/_xml.d)
121 Copyright Janice Caron 2008 - 2009.
122 Distributed under the Boost Software License, Version 1.0.
123 (See accompanying file LICENSE_1_0.txt or copy at
124 http://www.boost.org/LICENSE_1_0.txt)
128 enum cdata
= "<![CDATA[";
131 * Returns true if the character is a character according to the XML standard
133 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
136 * c = the character to be tested
138 bool isChar(dchar c
) @safe @nogc pure nothrow // rule 2
154 else if (0xE000 <= c
&& c
<= 0x10FFFF)
156 if ((c
& 0x1FFFFE) != 0xFFFE) // U+FFFE and U+FFFF
162 @safe @nogc nothrow pure unittest
164 assert(!isChar(cast(dchar) 0x8));
165 assert( isChar(cast(dchar) 0x9));
166 assert( isChar(cast(dchar) 0xA));
167 assert(!isChar(cast(dchar) 0xB));
168 assert(!isChar(cast(dchar) 0xC));
169 assert( isChar(cast(dchar) 0xD));
170 assert(!isChar(cast(dchar) 0xE));
171 assert(!isChar(cast(dchar) 0x1F));
172 assert( isChar(cast(dchar) 0x20));
173 assert( isChar('J'));
174 assert( isChar(cast(dchar) 0xD7FF));
175 assert(!isChar(cast(dchar) 0xD800));
176 assert(!isChar(cast(dchar) 0xDFFF));
177 assert( isChar(cast(dchar) 0xE000));
178 assert( isChar(cast(dchar) 0xFFFD));
179 assert(!isChar(cast(dchar) 0xFFFE));
180 assert(!isChar(cast(dchar) 0xFFFF));
181 assert( isChar(cast(dchar) 0x10000));
182 assert( isChar(cast(dchar) 0x10FFFF));
183 assert(!isChar(cast(dchar) 0x110000));
185 debug (stdxml_TestHardcodedChecks
)
187 foreach (c
; 0 .. dchar.max
+ 1)
188 assert(isChar(c
) == lookup(CharTable
, c
));
193 * Returns true if the character is whitespace according to the XML standard
195 * Only the following characters are considered whitespace in XML - space, tab,
196 * carriage return and linefeed
198 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
201 * c = the character to be tested
203 bool isSpace(dchar c
) @safe @nogc pure nothrow
205 return c
== '\u0020' || c
== '\u0009' || c
== '\u000A' || c
== '\u000D';
209 * Returns true if the character is a digit according to the XML standard
211 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
214 * c = the character to be tested
216 bool isDigit(dchar c
) @safe @nogc pure nothrow
218 if (c
<= 0x0039 && c
>= 0x0030)
221 return lookup(DigitTable
,c
);
224 @safe @nogc nothrow pure unittest
226 debug (stdxml_TestHardcodedChecks
)
228 foreach (c
; 0 .. dchar.max
+ 1)
229 assert(isDigit(c
) == lookup(DigitTable
, c
));
234 * Returns true if the character is a letter according to the XML standard
236 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
239 * c = the character to be tested
241 bool isLetter(dchar c
) @safe @nogc nothrow pure // rule 84
243 return isIdeographic(c
) ||
isBaseChar(c
);
247 * Returns true if the character is an ideographic character according to the
250 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
253 * c = the character to be tested
255 bool isIdeographic(dchar c
) @safe @nogc nothrow pure
259 if (c
<= 0x3029 && c
>= 0x3021 )
261 if (c
<= 0x9FA5 && c
>= 0x4E00)
266 @safe @nogc nothrow pure unittest
268 assert(isIdeographic('\u4E00'));
269 assert(isIdeographic('\u9FA5'));
270 assert(isIdeographic('\u3007'));
271 assert(isIdeographic('\u3021'));
272 assert(isIdeographic('\u3029'));
274 debug (stdxml_TestHardcodedChecks
)
276 foreach (c
; 0 .. dchar.max
+ 1)
277 assert(isIdeographic(c
) == lookup(IdeographicTable
, c
));
282 * Returns true if the character is a base character according to the XML
285 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
288 * c = the character to be tested
290 bool isBaseChar(dchar c
) @safe @nogc nothrow pure
292 return lookup(BaseCharTable
,c
);
296 * Returns true if the character is a combining character according to the
299 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
302 * c = the character to be tested
304 bool isCombiningChar(dchar c
) @safe @nogc nothrow pure
306 return lookup(CombiningCharTable
,c
);
310 * Returns true if the character is an extender according to the XML standard
312 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
315 * c = the character to be tested
317 bool isExtender(dchar c
) @safe @nogc nothrow pure
319 return lookup(ExtenderTable
,c
);
323 * Encodes a string by replacing all characters which need to be escaped with
324 * appropriate predefined XML entities.
326 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
327 * and greater-than), and similarly, decode() unescapes them. These functions
328 * are provided for convenience only. You do not need to use them when using
329 * the std.xml classes, because then all the encoding and decoding will be done
330 * for you automatically.
332 * If the string is not modified, the original will be returned.
334 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
337 * s = The string to be encoded
339 * Returns: The encoded string
343 * writefln(encode("a > b")); // writes "a > b"
348 import std
.array
: appender
;
352 auto result
= appender
!S();
358 case '&': r
= "&"; break;
359 case '"': r
= """; break;
360 case '\'': r
= "'"; break;
361 case '<': r
= "<"; break;
362 case '>': r
= ">"; break;
366 result
.put(s
[lastI
.. i
]);
371 if (!result
.data
.ptr
) return s
;
372 result
.put(s
[lastI
.. $]);
379 assert(encode(s
) is s
);
380 assert(encode("a > b") == "a > b", encode("a > b"));
381 assert(encode("a < b") == "a < b");
382 assert(encode("don't") == "don't");
383 assert(encode("\"hi\"") == ""hi"", encode("\"hi\""));
384 assert(encode("cat & dog") == "cat & dog");
388 * Mode to use for decoding.
390 * $(DDOC_ENUM_MEMBERS NONE) Do not decode
391 * $(DDOC_ENUM_MEMBERS LOOSE) Decode, but ignore errors
392 * $(DDOC_ENUM_MEMBERS STRICT) Decode, and throw exception on error
400 * Decodes a string by unescaping all predefined XML entities.
402 * encode() escapes certain characters (ampersand, quote, apostrophe, less-than
403 * and greater-than), and similarly, decode() unescapes them. These functions
404 * are provided for convenience only. You do not need to use them when using
405 * the std.xml classes, because then all the encoding and decoding will be done
406 * for you automatically.
408 * This function decodes the entities &amp;, &quot;, &apos;,
409 * &lt; and &gt,
410 * as well as decimal and hexadecimal entities such as &#x20AC;
412 * If the string does not contain an ampersand, the original will be returned.
414 * Note that the "mode" parameter can be one of DecodeMode.NONE (do not
415 * decode), DecodeMode.LOOSE (decode, but ignore errors), or DecodeMode.STRICT
416 * (decode, and throw a DecodeException in the event of an error).
418 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
421 * s = The string to be decoded
422 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
424 * Throws: DecodeException if mode == DecodeMode.STRICT and decode fails
426 * Returns: The decoded string
430 * writefln(decode("a > b")); // writes "a > b"
433 string
decode(string s
, DecodeMode mode
=DecodeMode
.LOOSE
) @safe pure
435 import std
.algorithm
.searching
: startsWith
;
437 if (mode
== DecodeMode
.NONE
) return s
;
440 foreach (ref i
; 0 .. s
.length
)
445 if (buffer
.length
!= 0) buffer
~= c
;
449 if (buffer
.length
== 0)
451 buffer
= s
[0 .. i
].dup
;
453 if (startsWith(s
[i
..$],"&#"))
461 import std
.utf
: encode
;
462 buffer
~= temp
[0 .. encode(temp
, d
)];
463 i
= s
.length
- t
.length
- 1;
467 if (mode
== DecodeMode
.STRICT
)
468 throw new DecodeException("Unescaped &");
472 else if (startsWith(s
[i
..$],"&" )) { buffer
~= '&'; i
+= 4; }
473 else if (startsWith(s
[i
..$],""")) { buffer
~= '"'; i
+= 5; }
474 else if (startsWith(s
[i
..$],"'")) { buffer
~= '\''; i
+= 5; }
475 else if (startsWith(s
[i
..$],"<" )) { buffer
~= '<'; i
+= 3; }
476 else if (startsWith(s
[i
..$],">" )) { buffer
~= '>'; i
+= 3; }
479 if (mode
== DecodeMode
.STRICT
)
480 throw new DecodeException("Unescaped &");
485 return (buffer
.length
== 0) ? s
: buffer
;
490 void assertNot(string s
) pure
493 try { decode(s
,DecodeMode
.STRICT
); }
494 catch (DecodeException e
) { b
= true; }
498 // Assert that things that should work, do
500 assert(decode(s
, DecodeMode
.STRICT
) is s
);
501 assert(decode("a > b", DecodeMode
.STRICT
) == "a > b");
502 assert(decode("a < b", DecodeMode
.STRICT
) == "a < b");
503 assert(decode("don't", DecodeMode
.STRICT
) == "don't");
504 assert(decode(""hi"", DecodeMode
.STRICT
) == "\"hi\"");
505 assert(decode("cat & dog", DecodeMode
.STRICT
) == "cat & dog");
506 assert(decode("*", DecodeMode
.STRICT
) == "*");
507 assert(decode("*", DecodeMode
.STRICT
) == "*");
508 assert(decode("cat & dog", DecodeMode
.LOOSE
) == "cat & dog");
509 assert(decode("a > b", DecodeMode
.LOOSE
) == "a > b");
510 assert(decode("&#;", DecodeMode
.LOOSE
) == "&#;");
511 assert(decode("&#x;", DecodeMode
.LOOSE
) == "&#x;");
512 assert(decode("G;", DecodeMode
.LOOSE
) == "G;");
513 assert(decode("G;", DecodeMode
.LOOSE
) == "G;");
515 // Assert that things that shouldn't work, don't
516 assertNot("cat & dog");
517 assertNot("a > b");
525 * Class representing an XML document.
527 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
530 class Document
: Element
533 * Contains all text which occurs before the root element.
534 * Defaults to <?xml version="1.0"?>
536 string prolog
= "<?xml version=\"1.0\"?>";
538 * Contains all text which occurs after the root element.
539 * Defaults to the empty string
544 * Constructs a Document by parsing XML text.
546 * This function creates a complete DOM (Document Object Model) tree.
548 * The input to this function MUST be valid XML.
549 * This is enforced by DocumentParser's in contract.
552 * s = the complete XML text.
557 assert(s
.length
!= 0);
561 auto xml
= new DocumentParser(s
);
562 string tagString
= xml
.tag
.tagString
;
565 prolog
= s
[0 .. tagString
.ptr
- s
.ptr
];
571 * Constructs a Document from a Tag.
574 * tag = the start tag of the document.
584 * Compares two Documents for equality
592 override bool opEquals(scope const Object o
) const
594 const doc
= toType
!(const Document
)(o
);
595 return prolog
== doc
.prolog
596 && (cast(const) this).Element
.opEquals(cast(const) doc
)
597 && epilog
== doc
.epilog
;
601 * Compares two Documents
603 * You should rarely need to call this function. It exists so that
604 * Documents can be used as associative array keys.
612 override int opCmp(scope const Object o
) scope const
614 const doc
= toType
!(const Document
)(o
);
615 if (prolog
!= doc
.prolog
)
616 return prolog
< doc
.prolog ?
-1 : 1;
617 if (int cmp = this.Element
.opCmp(doc
))
619 if (epilog
!= doc
.epilog
)
620 return epilog
< doc
.epilog ?
-1 : 1;
625 * Returns the hash of a Document
627 * You should rarely need to call this function. It exists so that
628 * Documents can be used as associative array keys.
630 override size_t
toHash() scope const @trusted
632 return hash(prolog
, hash(epilog
, (cast() this).Element
.toHash()));
636 * Returns the string representation of a Document. (That is, the
637 * complete XML of a document).
639 override string
toString() scope const @safe
641 return prolog
~ super.toString() ~ epilog
;
648 // https://issues.dlang.org/show_bug.cgi?id=14966
649 auto xml
= `<?xml version="1.0" encoding="UTF-8"?><foo></foo>`;
651 auto a
= new Document(xml
);
652 auto b
= new Document(xml
);
659 b
~= new Element("b");
665 * Class representing an XML element.
667 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
671 Tag tag
; /// The start tag of the element
672 Item
[] items
; /// The element's items
673 Text
[] texts
; /// The element's text items
674 CData
[] cdatas
; /// The element's CData items
675 Comment
[] comments
; /// The element's comments
676 ProcessingInstruction
[] pis
; /// The element's processing instructions
677 Element
[] elements
; /// The element's child elements
680 * Constructs an Element given a name and a string to be used as a Text
684 * name = the name of the element.
685 * interior = (optional) the string interior.
688 * -------------------------------------------------------
689 * auto element = new Element("title","Serenity")
690 * // constructs the element <title>Serenity</title>
691 * -------------------------------------------------------
693 this(string name
, string interior
=null) @safe pure
696 if (interior
.length
!= 0) opCatAssign(new Text(interior
));
700 * Constructs an Element from a Tag.
703 * tag_ = the start or empty tag of the element.
705 this(const(Tag
) tag_
) @safe pure
707 this.tag
= new Tag(tag_
.name
);
708 tag
.type
= TagType
.EMPTY
;
709 foreach (k
,v
;tag_
.attr
) tag
.attr
[k
] = v
;
710 tag
.tagString
= tag_
.tagString
;
714 * Append a text item to the interior of this element
717 * item = the item you wish to append.
722 * element ~= new Text("hello");
725 void opCatAssign(Text item
) @safe pure
732 * Append a CData item to the interior of this element
735 * item = the item you wish to append.
740 * element ~= new CData("hello");
743 void opCatAssign(CData item
) @safe pure
750 * Append a comment to the interior of this element
753 * item = the item you wish to append.
758 * element ~= new Comment("hello");
761 void opCatAssign(Comment item
) @safe pure
768 * Append a processing instruction to the interior of this element
771 * item = the item you wish to append.
776 * element ~= new ProcessingInstruction("hello");
779 void opCatAssign(ProcessingInstruction item
) @safe pure
786 * Append a complete element to the interior of this element
789 * item = the item you wish to append.
794 * Element other = new Element("br");
796 * // appends element representing <br />
799 void opCatAssign(Element item
) @safe pure
805 private void appendItem(Item item
) @safe pure
808 if (tag
.type
== TagType
.EMPTY
&& !item
.isEmptyXML
)
809 tag
.type
= TagType
.START
;
812 private void parse(ElementParser xml
)
814 xml
.onText
= (string s
) { opCatAssign(new Text(s
)); };
815 xml
.onCData
= (string s
) { opCatAssign(new CData(s
)); };
816 xml
.onComment
= (string s
) { opCatAssign(new Comment(s
)); };
817 xml
.onPI
= (string s
) { opCatAssign(new ProcessingInstruction(s
)); };
819 xml
.onStartTag
[null] = (ElementParser xml
)
821 auto e
= new Element(xml
.tag
);
830 * Compares two Elements for equality
838 override bool opEquals(scope const Object o
) const
840 const element
= toType
!(const Element
)(o
);
841 immutable len
= items
.length
;
842 if (len
!= element
.items
.length
) return false;
843 foreach (i
; 0 .. len
)
845 if (!items
[i
].opEquals(element
.items
[i
])) return false;
851 * Compares two Elements
853 * You should rarely need to call this function. It exists so that Elements
854 * can be used as associative array keys.
862 override int opCmp(scope const Object o
) @safe const
864 const element
= toType
!(const Element
)(o
);
865 for (uint i
=0; ; ++i
)
867 if (i
== items
.length
&& i
== element
.items
.length
) return 0;
868 if (i
== items
.length
) return -1;
869 if (i
== element
.items
.length
) return 1;
870 if (!items
[i
].opEquals(element
.items
[i
]))
871 return items
[i
].opCmp(element
.items
[i
]);
876 * Returns the hash of an Element
878 * You should rarely need to call this function. It exists so that Elements
879 * can be used as associative array keys.
881 override size_t
toHash() scope const @safe
883 size_t hash
= tag
.toHash();
884 foreach (item
;items
) hash
+= item
.toHash();
891 * Returns the decoded interior of an element.
893 * The element is assumed to contain text <i>only</i>. So, for
894 * example, given XML such as "<title>Good &amp;
895 * Bad</title>", will return "Good & Bad".
898 * mode = (optional) Mode to use for decoding. (Defaults to LOOSE).
900 * Throws: DecodeException if decode fails
902 string
text(DecodeMode mode
=DecodeMode
.LOOSE
)
907 Text t
= cast(Text
) item
;
908 if (t
is null) throw new DecodeException(item
.toString());
909 buffer
~= decode(t
.toString(),mode
);
915 * Returns an indented string representation of this item
918 * indent = (optional) number of spaces by which to indent this
919 * element. Defaults to 2.
921 override string
[] pretty(uint indent
=2) scope
923 import std
.algorithm
.searching
: count
;
924 import std
.string
: rightJustify
;
926 if (isEmptyXML
) return [ tag
.toEmptyString() ];
928 if (items
.length
== 1)
930 auto t
= cast(const(Text
))(items
[0]);
933 return [tag
.toStartString() ~ t
.toString() ~ tag
.toEndString()];
937 string
[] a
= [ tag
.toStartString() ];
940 string
[] b
= item
.pretty(indent
);
943 a
~= rightJustify(s
,count(s
) + indent
);
946 a
~= tag
.toEndString();
951 * Returns the string representation of an Element
955 * auto element = new Element("br");
956 * writefln(element.toString()); // writes "<br />"
959 override string
toString() scope @safe
961 if (isEmptyXML
) return tag
.toEmptyString();
963 string buffer
= tag
.toStartString();
964 foreach (item
;items
) { buffer
~= item
.toString(); }
965 buffer
~= tag
.toEndString();
969 override @property @safe pure @nogc nothrow bool isEmptyXML() const scope { return items
.length
== 0; }
976 * $(DDOC_ENUM_MEMBERS START) Used for start tags
977 * $(DDOC_ENUM_MEMBERS END) Used for end tags
978 * $(DDOC_ENUM_MEMBERS EMPTY) Used for empty tags
981 enum TagType
{ START
, END
, EMPTY
}
984 * Class representing an XML tag.
986 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
988 * The class invariant guarantees
990 * <li> that $(B type) is a valid enum TagType value</li>
991 * <li> that $(B name) consists of valid characters</li>
992 * <li> that each attribute name consists of valid characters</li>
997 TagType type
= TagType
.START
; /// Type of tag
998 string name
; /// Tag name
999 string
[string
] attr
; /// Associative array of attributes
1000 private string tagString
;
1007 assert(type
== TagType
.START
1008 || type
== TagType
.END
1009 || type
== TagType
.EMPTY
);
1012 try { checkName(s
,t
); }
1013 catch (Err e
) { assert(false,"Invalid tag name:" ~ e
.toString()); }
1018 try { checkName(s
,t
); }
1020 { assert(false,"Invalid atrribute name:" ~ e
.toString()); }
1025 * Constructs an instance of Tag with a specified name and type
1027 * The constructor does not initialize the attributes. To initialize the
1028 * attributes, you access the $(B attr) member variable.
1031 * name = the Tag's name
1032 * type = (optional) the Tag's type. If omitted, defaults to
1037 * auto tag = new Tag("img",Tag.EMPTY);
1038 * tag.attr["src"] = "http://example.com/example.jpg";
1041 this(string name
, TagType type
=TagType
.START
) @safe pure
1047 /* Private constructor (so don't ddoc this!)
1049 * Constructs a Tag by parsing the string representation, e.g. "<html>".
1051 * The string is passed by reference, and is advanced over all characters
1054 * The second parameter is a dummy parameter only, required solely to
1055 * distinguish this constructor from the public one.
1057 private this(ref string s
, bool dummy
) @safe pure
1059 import std
.algorithm
.searching
: countUntil
;
1060 import std
.ascii
: isWhite
;
1061 import std
.utf
: byCodeUnit
;
1067 if (optc(s
,'/')) type
= TagType
.END
;
1068 ptrdiff_t i
= s
.byCodeUnit
.countUntil(">", "/>", " ", "\t", "\v", "\r", "\n", "\f");
1072 i
= s
.byCodeUnit
.countUntil
!(a
=> !isWhite(a
));
1075 while (s
.length
> 0 && s
[0] != '>' && s
[0] != '/')
1077 i
= s
.byCodeUnit
.countUntil("=", " ", "\t", "\v", "\r", "\n", "\f");
1078 string key
= s
[0 .. i
];
1081 i
= s
.byCodeUnit
.countUntil
!(a
=> !isWhite(a
));
1084 i
= s
.byCodeUnit
.countUntil
!(a
=> !isWhite(a
));
1087 immutable char quote
= requireOneOf(s
,"'\"");
1088 i
= s
.byCodeUnit
.countUntil(quote
);
1089 string val
= decode(s
[0 .. i
], DecodeMode
.LOOSE
);
1093 i
= s
.byCodeUnit
.countUntil
!(a
=> !isWhite(a
));
1099 if (type
== TagType
.END
) throw new TagException("");
1100 type
= TagType
.EMPTY
;
1103 tagString
.length
= tagString
.length
- s
.length
;
1105 catch (XMLException e
)
1107 tagString
.length
= tagString
.length
- s
.length
;
1108 throw new TagException(tagString
);
1115 * Compares two Tags for equality
1117 * You should rarely need to call this function. It exists so that Tags
1118 * can be used as associative array keys.
1123 * if (tag1 == tag2) { }
1126 override bool opEquals(scope Object o
)
1128 const tag
= toType
!(const Tag
)(o
);
1130 (name
!= tag
.name
) ?
false : (
1131 (attr
!= tag
.attr
) ?
false : (
1132 (type
!= tag
.type
) ?
false : (
1142 * if (tag1 < tag2) { }
1145 override int opCmp(Object o
)
1147 const tag
= toType
!(const Tag
)(o
);
1148 // Note that attr is an AA, so the comparison is nonsensical (bug 10381)
1150 ((name
!= tag
.name
) ?
( name
< tag
.name ?
-1 : 1 ) :
1151 ((attr
!= tag
.attr
) ?
( cast(void *) attr
< cast(void*) tag
.attr ?
-1 : 1 ) :
1152 ((type
!= tag
.type
) ?
( type
< tag
.type ?
-1 : 1 ) :
1157 * Returns the hash of a Tag
1159 * You should rarely need to call this function. It exists so that Tags
1160 * can be used as associative array keys.
1162 override size_t
toHash()
1164 return typeid(name
).getHash(&name
);
1168 * Returns the string representation of a Tag
1172 * auto tag = new Tag("book",TagType.START);
1173 * writefln(tag.toString()); // writes "<book>"
1176 override string
toString() @safe
1178 if (isEmpty
) return toEmptyString();
1179 return (isEnd
) ?
toEndString() : toStartString();
1184 string
toNonEndString() @safe
1186 import std
.format
: format
;
1188 string s
= "<" ~ name
;
1189 foreach (key
,val
;attr
)
1190 s
~= format(" %s=\"%s\"",key
,encode(val
));
1194 string
toStartString() @safe { return toNonEndString() ~ ">"; }
1196 string
toEndString() @safe { return "</" ~ name
~ ">"; }
1198 string
toEmptyString() @safe { return toNonEndString() ~ " />"; }
1202 * Returns true if the Tag is a start tag
1206 * if (tag.isStart) { }
1209 @property bool isStart() @safe @nogc pure nothrow { return type
== TagType
.START
; }
1212 * Returns true if the Tag is an end tag
1216 * if (tag.isEnd) { }
1219 @property bool isEnd() @safe @nogc pure nothrow { return type
== TagType
.END
; }
1222 * Returns true if the Tag is an empty tag
1226 * if (tag.isEmpty) { }
1229 @property bool isEmpty() @safe @nogc pure nothrow { return type
== TagType
.EMPTY
; }
1234 * Class representing a comment
1236 class Comment
: Item
1238 private string content
;
1241 * Construct a comment
1244 * content = the body of the comment
1246 * Throws: CommentException if the comment body is illegal (contains "--"
1247 * or exactly equals "-")
1251 * auto item = new Comment("This is a comment");
1252 * // constructs <!--This is a comment-->
1255 this(string content
) @safe pure
1257 import std
.string
: indexOf
;
1259 if (content
== "-" || content
.indexOf("--") != -1)
1260 throw new CommentException(content
);
1261 this.content
= content
;
1265 * Compares two comments for equality
1269 * Comment item1,item2;
1270 * if (item1 == item2) { }
1273 override bool opEquals(scope const Object o
) const
1275 const item
= toType
!(const Item
)(o
);
1276 const t
= cast(const Comment
) item
;
1277 return t
!is null && content
== t
.content
;
1281 * Compares two comments
1283 * You should rarely need to call this function. It exists so that Comments
1284 * can be used as associative array keys.
1288 * Comment item1,item2;
1289 * if (item1 < item2) { }
1292 override int opCmp(scope const Object o
) scope const
1294 const item
= toType
!(const Item
)(o
);
1295 const t
= cast(const Comment
) item
;
1296 return t
!is null && (content
!= t
.content
1297 ?
(content
< t
.content ?
-1 : 1 ) : 0 );
1301 * Returns the hash of a Comment
1303 * You should rarely need to call this function. It exists so that Comments
1304 * can be used as associative array keys.
1306 override size_t
toHash() scope const nothrow { return hash(content
); }
1309 * Returns a string representation of this comment
1311 override string
toString() scope const @safe pure nothrow { return "<!--" ~ content
~ "-->"; }
1313 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1316 @safe unittest // issue 16241
1318 import std
.exception
: assertThrown
;
1319 auto c
= new Comment("==");
1320 assert(c
.content
== "==");
1321 assertThrown
!CommentException(new Comment("--"));
1325 * Class representing a Character Data section
1329 private string content
;
1332 * Construct a character data section
1335 * content = the body of the character data segment
1337 * Throws: CDataException if the segment body is illegal (contains "]]>")
1341 * auto item = new CData("<b>hello</b>");
1342 * // constructs <![CDATA[<b>hello</b>]]>
1345 this(string content
) @safe pure
1347 import std
.string
: indexOf
;
1348 if (content
.indexOf("]]>") != -1) throw new CDataException(content
);
1349 this.content
= content
;
1353 * Compares two CDatas for equality
1357 * CData item1,item2;
1358 * if (item1 == item2) { }
1361 override bool opEquals(scope const Object o
) const
1363 const item
= toType
!(const Item
)(o
);
1364 const t
= cast(const CData
) item
;
1365 return t
!is null && content
== t
.content
;
1369 * Compares two CDatas
1371 * You should rarely need to call this function. It exists so that CDatas
1372 * can be used as associative array keys.
1376 * CData item1,item2;
1377 * if (item1 < item2) { }
1380 override int opCmp(scope const Object o
) scope const
1382 const item
= toType
!(const Item
)(o
);
1383 const t
= cast(const CData
) item
;
1384 return t
!is null && (content
!= t
.content
1385 ?
(content
< t
.content ?
-1 : 1 ) : 0 );
1389 * Returns the hash of a CData
1391 * You should rarely need to call this function. It exists so that CDatas
1392 * can be used as associative array keys.
1394 override size_t
toHash() scope const nothrow { return hash(content
); }
1397 * Returns a string representation of this CData section
1399 override string
toString() scope const @safe pure nothrow { return cdata
~ content
~ "]]>"; }
1401 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1405 * Class representing a text (aka Parsed Character Data) section
1409 private string content
;
1412 * Construct a text (aka PCData) section
1415 * content = the text. This function encodes the text before
1416 * insertion, so it is safe to insert any text
1420 * auto Text = new CData("a < b");
1421 * // constructs a < b
1424 this(string content
) @safe pure
1426 this.content
= encode(content
);
1430 * Compares two text sections for equality
1435 * if (item1 == item2) { }
1438 override bool opEquals(scope const Object o
) const
1440 const item
= toType
!(const Item
)(o
);
1441 const t
= cast(const Text
) item
;
1442 return t
!is null && content
== t
.content
;
1446 * Compares two text sections
1448 * You should rarely need to call this function. It exists so that Texts
1449 * can be used as associative array keys.
1454 * if (item1 < item2) { }
1457 override int opCmp(scope const Object o
) scope const
1459 const item
= toType
!(const Item
)(o
);
1460 const t
= cast(const Text
) item
;
1462 && (content
!= t
.content ?
(content
< t
.content ?
-1 : 1 ) : 0 );
1466 * Returns the hash of a text section
1468 * You should rarely need to call this function. It exists so that Texts
1469 * can be used as associative array keys.
1471 override size_t
toHash() scope const nothrow { return hash(content
); }
1474 * Returns a string representation of this Text section
1476 override string
toString() scope const @safe @nogc pure nothrow { return content
; }
1479 * Returns true if the content is the empty string
1481 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return content
.length
== 0; }
1485 * Class representing an XML Instruction section
1487 class XMLInstruction
: Item
1489 private string content
;
1492 * Construct an XML Instruction section
1495 * content = the body of the instruction segment
1497 * Throws: XIException if the segment body is illegal (contains ">")
1501 * auto item = new XMLInstruction("ATTLIST");
1502 * // constructs <!ATTLIST>
1505 this(string content
) @safe pure
1507 import std
.string
: indexOf
;
1508 if (content
.indexOf(">") != -1) throw new XIException(content
);
1509 this.content
= content
;
1513 * Compares two XML instructions for equality
1517 * XMLInstruction item1,item2;
1518 * if (item1 == item2) { }
1521 override bool opEquals(scope const Object o
) const
1523 const item
= toType
!(const Item
)(o
);
1524 const t
= cast(const XMLInstruction
) item
;
1525 return t
!is null && content
== t
.content
;
1529 * Compares two XML instructions
1531 * You should rarely need to call this function. It exists so that
1532 * XmlInstructions can be used as associative array keys.
1536 * XMLInstruction item1,item2;
1537 * if (item1 < item2) { }
1540 override int opCmp(scope const Object o
) scope const
1542 const item
= toType
!(const Item
)(o
);
1543 const t
= cast(const XMLInstruction
) item
;
1545 && (content
!= t
.content ?
(content
< t
.content ?
-1 : 1 ) : 0 );
1549 * Returns the hash of an XMLInstruction
1551 * You should rarely need to call this function. It exists so that
1552 * XmlInstructions can be used as associative array keys.
1554 override size_t
toHash() scope const nothrow { return hash(content
); }
1557 * Returns a string representation of this XmlInstruction
1559 override string
toString() scope const @safe pure nothrow { return "<!" ~ content
~ ">"; }
1561 override @property @safe @nogc pure nothrow scope bool isEmptyXML() const { return false; } /// Returns false always
1565 * Class representing a Processing Instruction section
1567 class ProcessingInstruction
: Item
1569 private string content
;
1572 * Construct a Processing Instruction section
1575 * content = the body of the instruction segment
1577 * Throws: PIException if the segment body is illegal (contains "?>")
1581 * auto item = new ProcessingInstruction("php");
1582 * // constructs <?php?>
1585 this(string content
) @safe pure
1587 import std
.string
: indexOf
;
1588 if (content
.indexOf("?>") != -1) throw new PIException(content
);
1589 this.content
= content
;
1593 * Compares two processing instructions for equality
1597 * ProcessingInstruction item1,item2;
1598 * if (item1 == item2) { }
1601 override bool opEquals(scope const Object o
) const
1603 const item
= toType
!(const Item
)(o
);
1604 const t
= cast(const ProcessingInstruction
) item
;
1605 return t
!is null && content
== t
.content
;
1609 * Compares two processing instructions
1611 * You should rarely need to call this function. It exists so that
1612 * ProcessingInstructions can be used as associative array keys.
1616 * ProcessingInstruction item1,item2;
1617 * if (item1 < item2) { }
1620 override int opCmp(scope const Object o
) scope const
1622 const item
= toType
!(const Item
)(o
);
1623 const t
= cast(const ProcessingInstruction
) item
;
1625 && (content
!= t
.content ?
(content
< t
.content ?
-1 : 1 ) : 0 );
1629 * Returns the hash of a ProcessingInstruction
1631 * You should rarely need to call this function. It exists so that
1632 * ProcessingInstructions can be used as associative array keys.
1634 override size_t
toHash() scope const nothrow { return hash(content
); }
1637 * Returns a string representation of this ProcessingInstruction
1639 override string
toString() scope const @safe pure nothrow { return "<?" ~ content
~ "?>"; }
1641 override @property @safe @nogc pure nothrow bool isEmptyXML() scope const { return false; } /// Returns false always
1645 * Abstract base class for XML items
1649 /// Compares with another Item of same type for equality
1650 abstract override bool opEquals(scope const Object o
) @safe const;
1652 /// Compares with another Item of same type
1653 abstract override int opCmp(scope const Object o
) @safe const;
1655 /// Returns the hash of this item
1656 abstract override size_t
toHash() @safe scope const;
1658 /// Returns a string representation of this item
1659 abstract override string
toString() @safe scope const;
1662 * Returns an indented string representation of this item
1665 * indent = number of spaces by which to indent child elements
1667 string
[] pretty(uint indent
) @safe scope const
1669 import std
.string
: strip
;
1670 string s
= strip(toString());
1671 return s
.length
== 0 ?
[] : [ s
];
1674 /// Returns true if the item represents empty XML text
1675 abstract @property @safe @nogc pure nothrow bool isEmptyXML() scope const;
1679 * Class for parsing an XML Document.
1681 * This is a subclass of ElementParser. Most of the useful functions are
1684 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1687 * Currently only supports UTF documents.
1689 * If there is an encoding attribute in the prolog, it is ignored.
1692 class DocumentParser
: ElementParser
1697 * Constructs a DocumentParser.
1699 * The input to this function MUST be valid XML.
1700 * This is enforced by the function's in contract.
1703 * xmlText_ = the entire XML document as text
1706 this(string xmlText_
)
1709 assert(xmlText_
.length
!= 0);
1712 // Confirm that the input is valid XML
1715 catch (CheckException e
)
1717 // And if it's not, tell the user why not
1718 assert(false, "\n" ~ e
.toString());
1725 super(); // Initialize everything
1726 parse(); // Parse through the root tag (but not beyond)
1732 auto doc
= new Document("<root><child><grandchild/></child></root>");
1733 assert(doc
.elements
.length
== 1);
1734 assert(doc
.elements
[0].tag
.name
== "child");
1735 assert(doc
.items
== doc
.elements
);
1739 * Class for parsing an XML element.
1741 * Standards: $(LINK2 http://www.w3.org/TR/1998/REC-xml-19980210, XML 1.0)
1743 * Note that you cannot construct instances of this class directly. You can
1744 * construct a DocumentParser (which is a subclass of ElementParser), but
1745 * otherwise, Instances of ElementParser will be created for you by the
1746 * library, and passed your way via onStartTag handlers.
1751 alias Handler
= void delegate(string
);
1752 alias ElementHandler
= void delegate(in Element element
);
1753 alias ParserHandler
= void delegate(ElementParser parser
);
1758 string elementStart
;
1761 Handler commentHandler
= null;
1762 Handler cdataHandler
= null;
1763 Handler xiHandler
= null;
1764 Handler piHandler
= null;
1765 Handler rawTextHandler
= null;
1766 Handler textHandler
= null;
1768 // Private constructor for start tags
1769 this(ElementParser parent
) @safe @nogc pure nothrow
1776 // Private constructor for empty tags
1777 this(Tag tag
, string
* t
) @safe @nogc pure nothrow
1786 * The Tag at the start of the element being parsed. You can read this to
1787 * determine the tag's name and attributes.
1789 @property @safe @nogc pure nothrow const(Tag
) tag() const { return tag_
; }
1792 * Register a handler which will be called whenever a start tag is
1793 * encountered which matches the specified name. You can also pass null as
1794 * the name, in which case the handler will be called for any unmatched
1799 * // Call this function whenever a <podcast> start tag is encountered
1800 * onStartTag["podcast"] = (ElementParser xml)
1804 * // This is a a closure, so code here may reference
1805 * // variables which are outside of this scope
1808 * // call myEpisodeStartHandler (defined elsewhere) whenever an <episode>
1809 * // start tag is encountered
1810 * onStartTag["episode"] = &myEpisodeStartHandler;
1812 * // call delegate dg for all other start tags
1813 * onStartTag[null] = dg;
1816 * This library will supply your function with a new instance of
1817 * ElementHandler, which may be used to parse inside the element whose
1818 * start tag was just found, or to identify the tag attributes of the
1821 * Note that your function will be called for both start tags and empty
1822 * tags. That is, we make no distinction between <br></br>
1825 ParserHandler
[string
] onStartTag
;
1828 * Register a handler which will be called whenever an end tag is
1829 * encountered which matches the specified name. You can also pass null as
1830 * the name, in which case the handler will be called for any unmatched
1835 * // Call this function whenever a </podcast> end tag is encountered
1836 * onEndTag["podcast"] = (in Element e)
1840 * // This is a a closure, so code here may reference
1841 * // variables which are outside of this scope
1844 * // call myEpisodeEndHandler (defined elsewhere) whenever an </episode>
1845 * // end tag is encountered
1846 * onEndTag["episode"] = &myEpisodeEndHandler;
1848 * // call delegate dg for all other end tags
1849 * onEndTag[null] = dg;
1852 * Note that your function will be called for both start tags and empty
1853 * tags. That is, we make no distinction between <br></br>
1856 ElementHandler
[string
] onEndTag
;
1858 protected this() @safe @nogc pure nothrow
1864 * Register a handler which will be called whenever text is encountered.
1868 * // Call this function whenever text is encountered
1869 * onText = (string s)
1873 * // The passed parameter s will have been decoded by the time you see
1874 * // it, and so may contain any character.
1876 * // This is a a closure, so code here may reference
1877 * // variables which are outside of this scope
1881 @property @safe @nogc pure nothrow void onText(Handler handler
) { textHandler
= handler
; }
1884 * Register an alternative handler which will be called whenever text
1885 * is encountered. This differs from onText in that onText will decode
1886 * the text, whereas onTextRaw will not. This allows you to make design
1887 * choices, since onText will be more accurate, but slower, while
1888 * onTextRaw will be faster, but less accurate. Of course, you can
1889 * still call decode() within your handler, if you want, but you'd
1890 * probably want to use onTextRaw only in circumstances where you
1891 * know that decoding is unnecessary.
1895 * // Call this function whenever text is encountered
1896 * onText = (string s)
1900 * // The passed parameter s will NOT have been decoded.
1902 * // This is a a closure, so code here may reference
1903 * // variables which are outside of this scope
1907 @safe @nogc pure nothrow void onTextRaw(Handler handler
) { rawTextHandler
= handler
; }
1910 * Register a handler which will be called whenever a character data
1911 * segment is encountered.
1915 * // Call this function whenever a CData section is encountered
1916 * onCData = (string s)
1920 * // The passed parameter s does not include the opening <![CDATA[
1921 * // nor closing ]]>
1923 * // This is a a closure, so code here may reference
1924 * // variables which are outside of this scope
1928 @property @safe @nogc pure nothrow void onCData(Handler handler
) { cdataHandler
= handler
; }
1931 * Register a handler which will be called whenever a comment is
1936 * // Call this function whenever a comment is encountered
1937 * onComment = (string s)
1941 * // The passed parameter s does not include the opening <!-- nor
1944 * // This is a a closure, so code here may reference
1945 * // variables which are outside of this scope
1949 @property @safe @nogc pure nothrow void onComment(Handler handler
) { commentHandler
= handler
; }
1952 * Register a handler which will be called whenever a processing
1953 * instruction is encountered.
1957 * // Call this function whenever a processing instruction is encountered
1962 * // The passed parameter s does not include the opening <? nor
1965 * // This is a a closure, so code here may reference
1966 * // variables which are outside of this scope
1970 @property @safe @nogc pure nothrow void onPI(Handler handler
) { piHandler
= handler
; }
1973 * Register a handler which will be called whenever an XML instruction is
1978 * // Call this function whenever an XML instruction is encountered
1979 * // (Note: XML instructions may only occur preceding the root tag of a
1985 * // The passed parameter s does not include the opening <! nor
1988 * // This is a a closure, so code here may reference
1989 * // variables which are outside of this scope
1993 @property @safe @nogc pure nothrow void onXI(Handler handler
) { xiHandler
= handler
; }
1996 * Parse an XML element.
1998 * Parsing will continue until the end of the current element. Any items
1999 * encountered for which a handler has been registered will invoke that
2002 * Throws: various kinds of XMLException
2006 import std
.algorithm
.searching
: startsWith
;
2007 import std
.string
: indexOf
;
2010 const Tag root
= tag_
;
2011 Tag
[string
] startTags
;
2012 if (tag_
!is null) startTags
[tag_
.name
] = tag_
;
2014 while (s
.length
!= 0)
2016 if (startsWith(*s
,"<!--"))
2019 t
= chop(*s
,indexOf(*s
,"-->"));
2020 if (commentHandler
.funcptr
!is null) commentHandler(t
);
2023 else if (startsWith(*s
,"<![CDATA["))
2026 t
= chop(*s
,indexOf(*s
,"]]>"));
2027 if (cdataHandler
.funcptr
!is null) cdataHandler(t
);
2030 else if (startsWith(*s
,"<!"))
2033 t
= chop(*s
,indexOf(*s
,">"));
2034 if (xiHandler
.funcptr
!is null) xiHandler(t
);
2037 else if (startsWith(*s
,"<?"))
2040 t
= chop(*s
,indexOf(*s
,"?>"));
2041 if (piHandler
.funcptr
!is null) piHandler(t
);
2044 else if (startsWith(*s
,"<"))
2046 tag_
= new Tag(*s
,true);
2048 return; // Return to constructor of derived class
2052 startTags
[tag_
.name
] = tag_
;
2054 auto parser
= new ElementParser(this);
2056 auto handler
= tag_
.name
in onStartTag
;
2057 if (handler
!is null) (*handler
)(parser
);
2060 handler
= null in onStartTag
;
2061 if (handler
!is null) (*handler
)(parser
);
2064 else if (tag_
.isEnd
)
2066 const startTag
= startTags
[tag_
.name
];
2069 if (startTag
.tagString
.length
== 0)
2072 immutable(char)* p
= startTag
.tagString
.ptr
2073 + startTag
.tagString
.length
;
2074 immutable(char)* q
= &tag_
.tagString
[0];
2075 text
= decode(p
[0..(q
-p
)], DecodeMode
.LOOSE
);
2077 auto element
= new Element(startTag
);
2078 if (text
.length
!= 0) element
~= new Text(text
);
2080 auto handler
= tag_
.name
in onEndTag
;
2081 if (handler
!is null) (*handler
)(element
);
2084 handler
= null in onEndTag
;
2085 if (handler
!is null) (*handler
)(element
);
2088 if (tag_
.name
== root
.name
) return;
2090 else if (tag_
.isEmpty
)
2092 Tag startTag
= new Tag(tag_
.name
);
2094 // FIX by hed010gy, for bug 2979
2095 // http://d.puremagic.com/issues/show_bug.cgi?id=2979
2096 if (tag_
.attr
.length
> 0)
2097 foreach (tn
,tv
; tag_
.attr
) startTag
.attr
[tn
]=tv
;
2100 // Handle the pretend start tag
2102 auto parser
= new ElementParser(startTag
,&s2
);
2103 auto handler1
= startTag
.name
in onStartTag
;
2104 if (handler1
!is null) (*handler1
)(parser
);
2107 handler1
= null in onStartTag
;
2108 if (handler1
!is null) (*handler1
)(parser
);
2111 // Handle the pretend end tag
2112 auto element
= new Element(startTag
);
2113 auto handler2
= tag_
.name
in onEndTag
;
2114 if (handler2
!is null) (*handler2
)(element
);
2117 handler2
= null in onEndTag
;
2118 if (handler2
!is null) (*handler2
)(element
);
2124 t
= chop(*s
,indexOf(*s
,"<"));
2125 if (rawTextHandler
.funcptr
!is null)
2127 else if (textHandler
.funcptr
!is null)
2128 textHandler(decode(t
,DecodeMode
.LOOSE
));
2134 * Returns that part of the element which has already been parsed
2136 override string
toString() const @nogc @safe pure nothrow
2138 assert(elementStart
.length
>= s
.length
);
2139 return elementStart
[0 .. elementStart
.length
- s
.length
];
2146 template Check(string msg
)
2150 void fail() @safe pure
2153 throw new Err(s
,msg
);
2156 void fail(Err e
) @safe pure
2159 throw new Err(s
,msg
,e
);
2162 void fail(string msg2
) @safe pure
2164 fail(new Err(s
,msg2
));
2168 void checkMisc(ref string s
) @safe pure // rule 27
2170 import std
.algorithm
.searching
: startsWith
;
2172 mixin Check
!("Misc");
2176 if (s
.startsWith("<!--")) { checkComment(s
); }
2177 else if (s
.startsWith("<?")) { checkPI(s
); }
2178 else { checkSpace(s
); }
2180 catch (Err e
) { fail(e
); }
2183 void checkDocument(ref string s
) @safe pure // rule 1
2185 mixin Check
!("Document");
2190 star
!(checkMisc
)(s
);
2192 catch (Err e
) { fail(e
); }
2195 void checkChars(ref string s
) @safe pure // rule 2
2197 // TO DO - Fix std.utf stride and decode functions, then use those
2199 import std
.format
: format
;
2201 mixin Check
!("Chars");
2205 foreach (int i
,dchar d
; s
)
2217 fail(format("invalid character: U+%04X",c
));
2221 void checkSpace(ref string s
) @safe pure // rule 3
2223 import std
.algorithm
.searching
: countUntil
;
2224 import std
.ascii
: isWhite
;
2225 import std
.utf
: byCodeUnit
;
2227 mixin Check
!("Whitespace");
2228 ptrdiff_t i
= s
.byCodeUnit
.countUntil
!(a
=> !isWhite(a
));
2229 if (i
== -1 && s
.length
> 0 && isWhite(s
[0]))
2233 if (s
is old
) fail();
2236 void checkName(ref string s
, out string name
) @safe pure // rule 5
2238 mixin Check
!("Name");
2240 if (s
.length
== 0) fail();
2242 foreach (int i
,dchar c
;s
)
2244 if (c
== '_' || c
== ':' ||
isLetter(c
)) continue;
2246 if (c
== '-' || c
== '.' ||
isDigit(c
)
2247 ||
isCombiningChar(c
) ||
isExtender(c
)) continue;
2255 void checkAttValue(ref string s
) @safe pure // rule 10
2257 import std
.algorithm
.searching
: countUntil
;
2258 import std
.utf
: byCodeUnit
;
2260 mixin Check
!("AttValue");
2262 if (s
.length
== 0) fail();
2264 if (c
!= '\u0022' && c
!= '\u0027')
2265 fail("attribute value requires quotes");
2269 s
= s
[s
.byCodeUnit
.countUntil(c
) .. $];
2270 if (s
.length
== 0) fail("unterminated attribute value");
2271 if (s
[0] == '<') fail("< found in attribute value");
2272 if (s
[0] == c
) break;
2273 try { checkReference(s
); } catch (Err e
) { fail(e
); }
2278 void checkCharData(ref string s
) @safe pure // rule 14
2280 import std
.algorithm
.searching
: startsWith
;
2282 mixin Check
!("CharData");
2284 while (s
.length
!= 0)
2286 if (s
.startsWith("&")) break;
2287 if (s
.startsWith("<")) break;
2288 if (s
.startsWith("]]>")) fail("]]> found within char data");
2293 void checkComment(ref string s
) @safe pure // rule 15
2295 import std
.string
: indexOf
;
2297 mixin Check
!("Comment");
2299 try { checkLiteral("<!--",s
); } catch (Err e
) { fail(e
); }
2300 ptrdiff_t n
= s
.indexOf("--");
2301 if (n
== -1) fail("unterminated comment");
2303 try { checkLiteral("-->",s
); } catch (Err e
) { fail(e
); }
2306 void checkPI(ref string s
) @safe pure // rule 16
2312 checkLiteral("<?",s
);
2315 catch (Err e
) { fail(e
); }
2318 void checkCDSect(ref string s
) @safe pure // rule 18
2320 mixin Check
!("CDSect");
2324 checkLiteral(cdata
,s
);
2327 catch (Err e
) { fail(e
); }
2330 void checkProlog(ref string s
) @safe pure // rule 22
2332 mixin Check
!("Prolog");
2336 /* The XML declaration is optional
2337 * http://www.w3.org/TR/2008/REC-xml-20081126/#NT-prolog
2339 opt
!(checkXMLDecl
)(s
);
2341 star
!(checkMisc
)(s
);
2342 opt
!(seq
!(checkDocTypeDecl
,star
!(checkMisc
)))(s
);
2344 catch (Err e
) { fail(e
); }
2347 void checkXMLDecl(ref string s
) @safe pure // rule 23
2349 mixin Check
!("XMLDecl");
2353 checkLiteral("<?xml",s
);
2354 checkVersionInfo(s
);
2355 opt
!(checkEncodingDecl
)(s
);
2356 opt
!(checkSDDecl
)(s
);
2357 opt
!(checkSpace
)(s
);
2358 checkLiteral("?>",s
);
2360 catch (Err e
) { fail(e
); }
2363 void checkVersionInfo(ref string s
) @safe pure // rule 24
2365 mixin Check
!("VersionInfo");
2370 checkLiteral("version",s
);
2372 quoted
!(checkVersionNum
)(s
);
2374 catch (Err e
) { fail(e
); }
2377 void checkEq(ref string s
) @safe pure // rule 25
2383 opt
!(checkSpace
)(s
);
2384 checkLiteral("=",s
);
2385 opt
!(checkSpace
)(s
);
2387 catch (Err e
) { fail(e
); }
2390 void checkVersionNum(ref string s
) @safe pure // rule 26
2392 import std
.algorithm
.searching
: countUntil
;
2393 import std
.utf
: byCodeUnit
;
2395 mixin Check
!("VersionNum");
2397 s
= s
[s
.byCodeUnit
.countUntil('\"') .. $];
2398 if (s
is old
) fail();
2401 void checkDocTypeDecl(ref string s
) @safe pure // rule 28
2403 mixin Check
!("DocTypeDecl");
2407 checkLiteral("<!DOCTYPE",s
);
2409 // TO DO -- ensure DOCTYPE is well formed
2410 // (But not yet. That's one of our "future directions")
2414 catch (Err e
) { fail(e
); }
2417 void checkSDDecl(ref string s
) @safe pure // rule 32
2419 import std
.algorithm
.searching
: startsWith
;
2421 mixin Check
!("SDDecl");
2426 checkLiteral("standalone",s
);
2429 catch (Err e
) { fail(e
); }
2432 if (s
.startsWith("'yes'") || s
.startsWith("\"yes\"")) n
= 5;
2433 else if (s
.startsWith("'no'" ) || s
.startsWith("\"no\"" )) n
= 4;
2434 else fail("standalone attribute value must be 'yes', \"yes\","~
2439 void checkElement(ref string s
) @safe pure // rule 39
2441 mixin Check
!("Element");
2443 string sname
,ename
,t
;
2444 try { checkTag(s
,t
,sname
); } catch (Err e
) { fail(e
); }
2454 catch (Err e
) { fail(e
); }
2459 fail("end tag name \"" ~ ename
2460 ~ "\" differs from start tag name \""~sname
~"\"");
2466 void checkTag(ref string s
, out string type
, out string name
) @safe pure
2468 mixin Check
!("Tag");
2473 checkLiteral("<",s
);
2475 star
!(seq
!(checkSpace
,checkAttribute
))(s
);
2476 opt
!(checkSpace
)(s
);
2477 if (s
.length
!= 0 && s
[0] == '/')
2482 checkLiteral(">",s
);
2484 catch (Err e
) { fail(e
); }
2487 void checkAttribute(ref string s
) @safe pure // rule 41
2489 mixin Check
!("Attribute");
2498 catch (Err e
) { fail(e
); }
2501 void checkETag(ref string s
, out string name
) @safe pure // rule 42
2503 mixin Check
!("ETag");
2507 checkLiteral("</",s
);
2509 opt
!(checkSpace
)(s
);
2510 checkLiteral(">",s
);
2512 catch (Err e
) { fail(e
); }
2515 void checkContent(ref string s
) @safe pure // rule 43
2517 import std
.algorithm
.searching
: startsWith
;
2519 mixin Check
!("Content");
2523 while (s
.length
!= 0)
2526 if (s
.startsWith("&")) { checkReference(s
); }
2527 else if (s
.startsWith("<!--")) { checkComment(s
); }
2528 else if (s
.startsWith("<?")) { checkPI(s
); }
2529 else if (s
.startsWith(cdata
)) { checkCDSect(s
); }
2530 else if (s
.startsWith("</")) { break; }
2531 else if (s
.startsWith("<")) { checkElement(s
); }
2532 else { checkCharData(s
); }
2535 catch (Err e
) { fail(e
); }
2538 void checkCharRef(ref string s
, out dchar c
) @safe pure // rule 66
2540 import std
.format
: format
;
2542 mixin Check
!("CharRef");
2545 try { checkLiteral("&#",s
); } catch (Err e
) { fail(e
); }
2547 if (s
.length
!= 0 && s
[0] == 'x')
2552 if (s
.length
== 0) fail("unterminated character reference");
2554 fail("character reference must have at least one digit");
2555 while (s
.length
!= 0)
2557 immutable char d
= s
[0];
2561 case 'F','f': ++n
; goto case;
2562 case 'E','e': ++n
; goto case;
2563 case 'D','d': ++n
; goto case;
2564 case 'C','c': ++n
; goto case;
2565 case 'B','b': ++n
; goto case;
2566 case 'A','a': ++n
; goto case;
2567 case '9': ++n
; goto case;
2568 case '8': ++n
; goto case;
2569 case '7': ++n
; goto case;
2570 case '6': ++n
; goto case;
2571 case '5': ++n
; goto case;
2572 case '4': ++n
; goto case;
2573 case '3': ++n
; goto case;
2574 case '2': ++n
; goto case;
2575 case '1': ++n
; goto case;
2577 default: n
= 100; break;
2579 if (n
>= radix
) break;
2584 if (!isChar(c
)) fail(format("U+%04X is not a legal character",c
));
2585 if (s
.length
== 0 || s
[0] != ';') fail("expected ;");
2589 void checkReference(ref string s
) @safe pure // rule 67
2591 import std
.algorithm
.searching
: startsWith
;
2593 mixin Check
!("Reference");
2598 if (s
.startsWith("&#")) checkCharRef(s
,c
);
2599 else checkEntityRef(s
);
2601 catch (Err e
) { fail(e
); }
2604 void checkEntityRef(ref string s
) @safe pure // rule 68
2606 mixin Check
!("EntityRef");
2611 checkLiteral("&",s
);
2613 checkLiteral(";",s
);
2615 catch (Err e
) { fail(e
); }
2618 void checkEncName(ref string s
) @safe pure // rule 81
2620 import std
.algorithm
.searching
: countUntil
;
2621 import std
.ascii
: isAlpha
;
2622 import std
.utf
: byCodeUnit
;
2624 mixin Check
!("EncName");
2626 s
= s
[s
.byCodeUnit
.countUntil
!(a
=> !isAlpha(a
)) .. $];
2627 if (s
is old
) fail();
2628 s
= s
[s
.byCodeUnit
.countUntil('\"', '\'') .. $];
2631 void checkEncodingDecl(ref string s
) @safe pure // rule 80
2633 mixin Check
!("EncodingDecl");
2638 checkLiteral("encoding",s
);
2640 quoted
!(checkEncName
)(s
);
2642 catch (Err e
) { fail(e
); }
2647 void checkLiteral(string literal
,ref string s
) @safe pure
2649 import std
.string
: startsWith
;
2651 mixin Check
!("Literal");
2653 if (!s
.startsWith(literal
)) fail("Expected literal \""~literal
~"\"");
2654 s
= s
[literal
.length
..$];
2657 void checkEnd(string end
,ref string s
) @safe pure
2659 import std
.string
: indexOf
;
2660 // Deliberately no mixin Check here.
2662 auto n
= s
.indexOf(end
);
2663 if (n
== -1) throw new Err(s
,"Unable to find terminating \""~end
~"\"");
2665 checkLiteral(end
,s
);
2668 // Metafunctions -- none of these use mixin Check
2670 void opt(alias f
)(ref string s
)
2672 try { f(s
); } catch (Err e
) {}
2675 void plus(alias f
)(ref string s
)
2681 void star(alias f
)(ref string s
)
2683 while (s
.length
!= 0)
2686 catch (Err e
) { return; }
2690 void quoted(alias f
)(ref string s
)
2692 import std
.string
: startsWith
;
2694 if (s
.startsWith("'"))
2696 checkLiteral("'",s
);
2698 checkLiteral("'",s
);
2702 checkLiteral("\"",s
);
2704 checkLiteral("\"",s
);
2708 void seq(alias f
,alias g
)(ref string s
)
2716 * Check an entire XML document for well-formedness
2719 * s = the document to be checked, passed as a string
2721 * Throws: CheckException if the document is not well formed
2723 * CheckException's toString() method will yield the complete hierarchy of
2724 * parse failure (the XML equivalent of a stack trace), giving the line and
2725 * column number of every failure at every level.
2727 void check(string s
) @safe pure
2733 if (s
.length
!= 0) throw new Err(s
,"Junk found after document");
2742 @system pure unittest
2744 import std
.string
: indexOf
;
2748 check(q
"[<?xml version="1.0"?>
2751 <author>Gambardella, Matthew</author>
2752 <title>XML Developer's Guide</title>
2753 <genre>Computer</genre>
2754 <price>44.95</price>
2755 <publish_date>2000-10-01</publish_date>
2756 <description>An in-depth look at creating applications
2757 with XML.</description>
2760 <author>Ralls, Kim</author>
2761 <title>Midnight Rain</title>
2762 <genre>Fantasy</genres>
2764 <publish_date>2000-12-16</publish_date>
2765 <description>A former architect battles corporate zombies,
2766 an evil sorceress, and her own childhood to become queen
2767 of the world.</description>
2770 <author>Corets, Eva</author>
2771 <title>Maeve Ascendant</title>
2772 <genre>Fantasy</genre>
2774 <publish_date>2000-11-17</publish_date>
2775 <description>After the collapse of a nanotechnology
2776 society in England, the young survivors lay the
2777 foundation for a new society.</description>
2783 catch (CheckException e
)
2785 auto n
= e
.toString().indexOf("end tag name \"genres\" differs"~
2786 " from start tag name \"genre\"");
2794 <?xml version="1.0"?>
2805 catch (CheckException e
)
2807 assert(0, e
.toString());
2813 string test_xml
= `<?xml version="1.0" encoding='UTF-8'?><r><stream:stream
2814 xmlns:stream="http://etherx.'jabber'.org/streams"
2815 xmlns="jabber:'client'" from='jid.pl' id="587a5767"
2816 xml:lang="en" version="1.0" attr='a"b"c'>
2817 </stream:stream></r>`;
2819 DocumentParser parser
= new DocumentParser(test_xml
);
2820 bool tested
= false;
2821 parser
.onStartTag
["stream:stream"] = (ElementParser p
) {
2822 assert(p
.tag
.attr
["xmlns"] == "jabber:'client'");
2823 assert(p
.tag
.attr
["from"] == "jid.pl");
2824 assert(p
.tag
.attr
["attr"] == "a\"b\"c");
2834 <?xml version="1.0" encoding="utf
-8"?> <Tests>
2835 <Test thing="What
&
; Up
">What & Up Second</Test>
2838 auto xml
= new DocumentParser(s
);
2840 xml
.onStartTag
["Test"] = (ElementParser xml
) {
2841 assert(xml
.tag
.attr
["thing"] == "What & Up");
2844 xml
.onEndTag
["Test"] = (in Element e
) {
2845 assert(e
.text() == "What & Up Second");
2852 string s
= `<tag attr=""value>" />`;
2853 auto doc
= new Document(s
);
2854 assert(doc
.toString() == s
);
2857 /** The base class for exceptions thrown by this module */
2858 class XMLException
: Exception
{ this(string msg
) @safe pure { super(msg
); } }
2862 /// Thrown during Comment constructor
2863 class CommentException
: XMLException
2864 { private this(string msg
) @safe pure { super(msg
); } }
2866 /// Thrown during CData constructor
2867 class CDataException
: XMLException
2868 { private this(string msg
) @safe pure { super(msg
); } }
2870 /// Thrown during XMLInstruction constructor
2871 class XIException
: XMLException
2872 { private this(string msg
) @safe pure { super(msg
); } }
2874 /// Thrown during ProcessingInstruction constructor
2875 class PIException
: XMLException
2876 { private this(string msg
) @safe pure { super(msg
); } }
2878 /// Thrown during Text constructor
2879 class TextException
: XMLException
2880 { private this(string msg
) @safe pure { super(msg
); } }
2882 /// Thrown during decode()
2883 class DecodeException
: XMLException
2884 { private this(string msg
) @safe pure { super(msg
); } }
2886 /// Thrown if comparing with wrong type
2887 class InvalidTypeException
: XMLException
2888 { private this(string msg
) @safe pure { super(msg
); } }
2890 /// Thrown when parsing for Tags
2891 class TagException
: XMLException
2892 { private this(string msg
) @safe pure { super(msg
); } }
2895 * Thrown during check()
2897 class CheckException
: XMLException
2899 CheckException err
; /// Parent in hierarchy
2900 private string tail
;
2902 * Name of production rule which failed to parse,
2903 * or specific error message
2906 size_t line
= 0; /// Line number at which parse failure occurred
2907 size_t column
= 0; /// Column number at which parse failure occurred
2909 private this(string tail
,string msg
,Err err
=null) @safe pure
2917 private void complete(string entire
) @safe pure
2919 import std
.string
: count
, lastIndexOf
;
2920 import std
.utf
: toUTF32
;
2922 string head
= entire
[0..$-tail
.length
];
2923 ptrdiff_t n
= head
.lastIndexOf('\n') + 1;
2924 line
= head
.count("\n") + 1;
2925 dstring t
= toUTF32(head
[n
..$]);
2926 column
= t
.length
+ 1;
2927 if (err
!is null) err
.complete(entire
);
2930 override string
toString() const @safe pure
2932 import std
.format
: format
;
2935 if (line
!= 0) s
= format("Line %d, column %d: ",line
,column
);
2938 if (err
!is null) s
= err
.toString() ~ s
;
2943 private alias Err
= CheckException
;
2945 // Private helper functions
2949 inout(T
) toType(T
)(inout Object o
)
2954 throw new InvalidTypeException("Attempt to compare a "
2955 ~ T
.stringof
~ " with an instance of another type");
2960 string
chop(ref string s
, size_t n
) @safe pure nothrow
2962 if (n
== -1) n
= s
.length
;
2963 string t
= s
[0 .. n
];
2968 bool optc(ref string s
, char c
) @safe pure nothrow
2970 immutable bool b
= s
.length
!= 0 && s
[0] == c
;
2975 void reqc(ref string s
, char c
) @safe pure
2977 if (s
.length
== 0 || s
[0] != c
) throw new TagException("");
2981 char requireOneOf(ref string s
, string chars
) @safe pure
2983 import std
.string
: indexOf
;
2985 if (s
.length
== 0 ||
indexOf(chars
,s
[0]) == -1)
2986 throw new TagException("");
2987 immutable char ch
= s
[0];
2992 size_t
hash(string s
,size_t h
=0) @trusted nothrow
2994 return typeid(s
).getHash(&s
) + h
;
2997 // Definitions from the XML specification
2998 immutable CharTable
=[0x9,0x9,0xA,0xA,0xD,0xD,0x20,0xD7FF,0xE000,0xFFFD,
3000 immutable BaseCharTable
=[0x0041,0x005A,0x0061,0x007A,0x00C0,0x00D6,0x00D8,
3001 0x00F6,0x00F8,0x00FF,0x0100,0x0131,0x0134,0x013E,0x0141,0x0148,0x014A,
3002 0x017E,0x0180,0x01C3,0x01CD,0x01F0,0x01F4,0x01F5,0x01FA,0x0217,0x0250,
3003 0x02A8,0x02BB,0x02C1,0x0386,0x0386,0x0388,0x038A,0x038C,0x038C,0x038E,
3004 0x03A1,0x03A3,0x03CE,0x03D0,0x03D6,0x03DA,0x03DA,0x03DC,0x03DC,0x03DE,
3005 0x03DE,0x03E0,0x03E0,0x03E2,0x03F3,0x0401,0x040C,0x040E,0x044F,0x0451,
3006 0x045C,0x045E,0x0481,0x0490,0x04C4,0x04C7,0x04C8,0x04CB,0x04CC,0x04D0,
3007 0x04EB,0x04EE,0x04F5,0x04F8,0x04F9,0x0531,0x0556,0x0559,0x0559,0x0561,
3008 0x0586,0x05D0,0x05EA,0x05F0,0x05F2,0x0621,0x063A,0x0641,0x064A,0x0671,
3009 0x06B7,0x06BA,0x06BE,0x06C0,0x06CE,0x06D0,0x06D3,0x06D5,0x06D5,0x06E5,
3010 0x06E6,0x0905,0x0939,0x093D,0x093D,0x0958,0x0961,0x0985,0x098C,0x098F,
3011 0x0990,0x0993,0x09A8,0x09AA,0x09B0,0x09B2,0x09B2,0x09B6,0x09B9,0x09DC,
3012 0x09DD,0x09DF,0x09E1,0x09F0,0x09F1,0x0A05,0x0A0A,0x0A0F,0x0A10,0x0A13,
3013 0x0A28,0x0A2A,0x0A30,0x0A32,0x0A33,0x0A35,0x0A36,0x0A38,0x0A39,0x0A59,
3014 0x0A5C,0x0A5E,0x0A5E,0x0A72,0x0A74,0x0A85,0x0A8B,0x0A8D,0x0A8D,0x0A8F,
3015 0x0A91,0x0A93,0x0AA8,0x0AAA,0x0AB0,0x0AB2,0x0AB3,0x0AB5,0x0AB9,0x0ABD,
3016 0x0ABD,0x0AE0,0x0AE0,0x0B05,0x0B0C,0x0B0F,0x0B10,0x0B13,0x0B28,0x0B2A,
3017 0x0B30,0x0B32,0x0B33,0x0B36,0x0B39,0x0B3D,0x0B3D,0x0B5C,0x0B5D,0x0B5F,
3018 0x0B61,0x0B85,0x0B8A,0x0B8E,0x0B90,0x0B92,0x0B95,0x0B99,0x0B9A,0x0B9C,
3019 0x0B9C,0x0B9E,0x0B9F,0x0BA3,0x0BA4,0x0BA8,0x0BAA,0x0BAE,0x0BB5,0x0BB7,
3020 0x0BB9,0x0C05,0x0C0C,0x0C0E,0x0C10,0x0C12,0x0C28,0x0C2A,0x0C33,0x0C35,
3021 0x0C39,0x0C60,0x0C61,0x0C85,0x0C8C,0x0C8E,0x0C90,0x0C92,0x0CA8,0x0CAA,
3022 0x0CB3,0x0CB5,0x0CB9,0x0CDE,0x0CDE,0x0CE0,0x0CE1,0x0D05,0x0D0C,0x0D0E,
3023 0x0D10,0x0D12,0x0D28,0x0D2A,0x0D39,0x0D60,0x0D61,0x0E01,0x0E2E,0x0E30,
3024 0x0E30,0x0E32,0x0E33,0x0E40,0x0E45,0x0E81,0x0E82,0x0E84,0x0E84,0x0E87,
3025 0x0E88,0x0E8A,0x0E8A,0x0E8D,0x0E8D,0x0E94,0x0E97,0x0E99,0x0E9F,0x0EA1,
3026 0x0EA3,0x0EA5,0x0EA5,0x0EA7,0x0EA7,0x0EAA,0x0EAB,0x0EAD,0x0EAE,0x0EB0,
3027 0x0EB0,0x0EB2,0x0EB3,0x0EBD,0x0EBD,0x0EC0,0x0EC4,0x0F40,0x0F47,0x0F49,
3028 0x0F69,0x10A0,0x10C5,0x10D0,0x10F6,0x1100,0x1100,0x1102,0x1103,0x1105,
3029 0x1107,0x1109,0x1109,0x110B,0x110C,0x110E,0x1112,0x113C,0x113C,0x113E,
3030 0x113E,0x1140,0x1140,0x114C,0x114C,0x114E,0x114E,0x1150,0x1150,0x1154,
3031 0x1155,0x1159,0x1159,0x115F,0x1161,0x1163,0x1163,0x1165,0x1165,0x1167,
3032 0x1167,0x1169,0x1169,0x116D,0x116E,0x1172,0x1173,0x1175,0x1175,0x119E,
3033 0x119E,0x11A8,0x11A8,0x11AB,0x11AB,0x11AE,0x11AF,0x11B7,0x11B8,0x11BA,
3034 0x11BA,0x11BC,0x11C2,0x11EB,0x11EB,0x11F0,0x11F0,0x11F9,0x11F9,0x1E00,
3035 0x1E9B,0x1EA0,0x1EF9,0x1F00,0x1F15,0x1F18,0x1F1D,0x1F20,0x1F45,0x1F48,
3036 0x1F4D,0x1F50,0x1F57,0x1F59,0x1F59,0x1F5B,0x1F5B,0x1F5D,0x1F5D,0x1F5F,
3037 0x1F7D,0x1F80,0x1FB4,0x1FB6,0x1FBC,0x1FBE,0x1FBE,0x1FC2,0x1FC4,0x1FC6,
3038 0x1FCC,0x1FD0,0x1FD3,0x1FD6,0x1FDB,0x1FE0,0x1FEC,0x1FF2,0x1FF4,0x1FF6,
3039 0x1FFC,0x2126,0x2126,0x212A,0x212B,0x212E,0x212E,0x2180,0x2182,0x3041,
3040 0x3094,0x30A1,0x30FA,0x3105,0x312C,0xAC00,0xD7A3];
3041 immutable IdeographicTable
=[0x3007,0x3007,0x3021,0x3029,0x4E00,0x9FA5];
3042 immutable CombiningCharTable
=[0x0300,0x0345,0x0360,0x0361,0x0483,0x0486,
3043 0x0591,0x05A1,0x05A3,0x05B9,0x05BB,0x05BD,0x05BF,0x05BF,0x05C1,0x05C2,
3044 0x05C4,0x05C4,0x064B,0x0652,0x0670,0x0670,0x06D6,0x06DC,0x06DD,0x06DF,
3045 0x06E0,0x06E4,0x06E7,0x06E8,0x06EA,0x06ED,0x0901,0x0903,0x093C,0x093C,
3046 0x093E,0x094C,0x094D,0x094D,0x0951,0x0954,0x0962,0x0963,0x0981,0x0983,
3047 0x09BC,0x09BC,0x09BE,0x09BE,0x09BF,0x09BF,0x09C0,0x09C4,0x09C7,0x09C8,
3048 0x09CB,0x09CD,0x09D7,0x09D7,0x09E2,0x09E3,0x0A02,0x0A02,0x0A3C,0x0A3C,
3049 0x0A3E,0x0A3E,0x0A3F,0x0A3F,0x0A40,0x0A42,0x0A47,0x0A48,0x0A4B,0x0A4D,
3050 0x0A70,0x0A71,0x0A81,0x0A83,0x0ABC,0x0ABC,0x0ABE,0x0AC5,0x0AC7,0x0AC9,
3051 0x0ACB,0x0ACD,0x0B01,0x0B03,0x0B3C,0x0B3C,0x0B3E,0x0B43,0x0B47,0x0B48,
3052 0x0B4B,0x0B4D,0x0B56,0x0B57,0x0B82,0x0B83,0x0BBE,0x0BC2,0x0BC6,0x0BC8,
3053 0x0BCA,0x0BCD,0x0BD7,0x0BD7,0x0C01,0x0C03,0x0C3E,0x0C44,0x0C46,0x0C48,
3054 0x0C4A,0x0C4D,0x0C55,0x0C56,0x0C82,0x0C83,0x0CBE,0x0CC4,0x0CC6,0x0CC8,
3055 0x0CCA,0x0CCD,0x0CD5,0x0CD6,0x0D02,0x0D03,0x0D3E,0x0D43,0x0D46,0x0D48,
3056 0x0D4A,0x0D4D,0x0D57,0x0D57,0x0E31,0x0E31,0x0E34,0x0E3A,0x0E47,0x0E4E,
3057 0x0EB1,0x0EB1,0x0EB4,0x0EB9,0x0EBB,0x0EBC,0x0EC8,0x0ECD,0x0F18,0x0F19,
3058 0x0F35,0x0F35,0x0F37,0x0F37,0x0F39,0x0F39,0x0F3E,0x0F3E,0x0F3F,0x0F3F,
3059 0x0F71,0x0F84,0x0F86,0x0F8B,0x0F90,0x0F95,0x0F97,0x0F97,0x0F99,0x0FAD,
3060 0x0FB1,0x0FB7,0x0FB9,0x0FB9,0x20D0,0x20DC,0x20E1,0x20E1,0x302A,0x302F,
3061 0x3099,0x3099,0x309A,0x309A];
3062 immutable DigitTable
=[0x0030,0x0039,0x0660,0x0669,0x06F0,0x06F9,0x0966,
3063 0x096F,0x09E6,0x09EF,0x0A66,0x0A6F,0x0AE6,0x0AEF,0x0B66,0x0B6F,0x0BE7,
3064 0x0BEF,0x0C66,0x0C6F,0x0CE6,0x0CEF,0x0D66,0x0D6F,0x0E50,0x0E59,0x0ED0,
3065 0x0ED9,0x0F20,0x0F29];
3066 immutable ExtenderTable
=[0x00B7,0x00B7,0x02D0,0x02D0,0x02D1,0x02D1,0x0387,
3067 0x0387,0x0640,0x0640,0x0E46,0x0E46,0x0EC6,0x0EC6,0x3005,0x3005,0x3031,
3068 0x3035,0x309D,0x309E,0x30FC,0x30FE];
3070 bool lookup(const(int)[] table
, int c
) @safe @nogc nothrow pure
3072 while (table
.length
!= 0)
3074 auto m
= (table
.length
>> 1) & ~1;
3077 table
= table
[0 .. m
];
3079 else if (c
> table
[m
+1])
3081 table
= table
[m
+2..$];
3088 string
startOf(string s
) @safe nothrow pure
3093 r
~= (c
< 0x20 || c
> 0x7F) ?
'.' : c
;
3094 if (r
.length
>= 40) { r
~= "___"; break; }
3099 void exit(string s
=null)
3101 throw new XMLException(s
);