saxy.d

   1 /* Invisible Vector Library
   2  * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
   3  * Understanding is not required. Only obedience.
   4  *
   5  * This software is provided 'as-is', without any express or implied
   6  * warranty.  In no event will the authors be held liable for any damages
   7  * arising from the use of this software.
   8  * Permission is granted to anyone to use this software for any purpose,
   9  * including commercial applications, and to alter it and redistribute it
  10  * freely, subject to the following restrictions:
  11  * 1. The origin of this software must not be misrepresented; you must not
  12  *    claim that you wrote the original software. If you use this software
  13  *    in a product, an acknowledgment in the product documentation would be
  14  *    appreciated but is not required.
  15  * 2. Altered source versions must be plainly marked as such, and must not be
  16  *    misrepresented as being the original software.
  17  * 3. This notice may not be removed or altered from any source distribution.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22  */
  23 // SAX style xml parser
  24 module iv.saxy /*is aliced*/;
  25
  26 import std.encoding;
  27 import std.range;
  28
  29 import iv.alice;
  30 import iv.strex;
  31 import iv.vfs;
  32
  33
  34 // ////////////////////////////////////////////////////////////////////////// //
  35 //*WARNING*: attr keys are *NOT* strings!
  36 void xmparse(ST) (auto ref ST fl,
  37   scope void delegate (char[] name, char[][string] attrs) tagStart,
  38   scope void delegate (char[] name) tagEnd,
  39   scope void delegate (char[] text) content,
  40 ) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) {
  41   char[] buf;
  42   uint bufpos;
  43   char[][string] attrs;
  44   scope(exit) {
  45     attrs.destroy;
  46     buf.destroy;
  47   }
  48
  49   static bool isValidNameChar() (char ch) {
  50     pragma(inline, true);
  51     return
  52       (ch >= '0' && ch <= '9') ||
  53       (ch >= 'A' && ch <= 'Z') ||
  54       (ch >= 'a' && ch <= 'z') ||
  55       ch == '_' || ch == '-' || ch == ':';
  56   }
  57
  58   int tagLevel = 0;
  59
  60   void bufPut (const(char)[] chars...) {
  61     if (/*tagLevel &&*/ chars.length) {
  62       if (chars.length+bufpos > buf.length) {
  63         if (chars.length+bufpos >= int.max) throw new Exception("out of memory in xml parser");
  64         buf.assumeSafeAppend;
  65         buf.length = ((chars.length+bufpos)|0x3ff)+1;
  66       }
  67       buf[bufpos..bufpos+chars.length] = chars[];
  68       bufpos += chars.length;
  69     }
  70   }
  71
  72   void clearBuf () {
  73     bufpos = 0;
  74   }
  75
  76   char curCh;
  77   bool eof;
  78
  79   static if (isReadableStream!ST) {
  80     char[] rdbuf;
  81     scope(exit) rdbuf.destroy;
  82     uint rdbufpos, rdbufused;
  83   }
  84
  85   void skipChar () {
  86     if (!eof) {
  87       static if (isReadableStream!ST) {
  88         // buffer more bytes
  89         if (rdbufpos >= rdbufused) {
  90           if (rdbuf.length == 0) rdbuf.length = 32*1024;
  91           auto rd = fl.rawRead(rdbuf[]);
  92           if (rd.length == 0) { eof = true; curCh = 0; return; }
  93           rdbufpos = 0;
  94           rdbufused = cast(uint)rd.length;
  95         }
  96         curCh = rdbuf.ptr[rdbufpos++];
  97       } else {
  98         if (fl.empty) { eof = true; curCh = 0; return; }
  99         curCh = fl.front;
 100         fl.popFront;
 101       }
 102       if (curCh == 0) curCh = ' ';
 103     }
 104   }
 105
 106   // curCh is '&'
 107   void parseEntity (bool inattr) {
 108     assert(curCh == '&');
 109     bufPut(curCh);
 110     auto xpos = bufpos;
 111     skipChar();
 112     if (inattr) {
 113       while (!eof && curCh != '/' && curCh != '>' && curCh != '?' && curCh != ';' && bufpos-xpos < 9) {
 114         bufPut(curCh);
 115         skipChar();
 116       }
 117     } else {
 118       while (!eof && curCh != '<' && curCh != ';' && bufpos-xpos < 9) {
 119         bufPut(curCh);
 120         skipChar();
 121       }
 122     }
 123     if (!eof && curCh == ';' && bufpos > xpos) {
 124       import std.utf : encode, UseReplacementDchar;
 125       char[4] ubuf = void; // utf buffer
 126       switch (buf[xpos..bufpos]) {
 127         case "lt": bufpos = xpos-1; bufPut('<'); break;
 128         case "gt": bufpos = xpos-1; bufPut('>'); break;
 129         case "amp": bufpos = xpos-1; bufPut('&'); break;
 130         case "quot": bufpos = xpos-1; bufPut('"'); break;
 131         case "apos": bufpos = xpos-1; bufPut('\''); break;
 132         default:
 133           bufPut(curCh); // first put ';'
 134           if (bufpos-xpos > 3 && buf.ptr[xpos] == '#' && buf.ptr[xpos+1] == 'x') {
 135             // should be hex code
 136             uint n = 0;
 137             auto pos = xpos+2;
 138             while (pos < bufpos-1) {
 139               char ch = buf.ptr[pos++];
 140                    if (ch >= '0' && ch <= '9') n = n*16+ch-'0';
 141               else if (ch >= 'A' && ch <= 'F') n = n*16+ch-'A'+10;
 142               else if (ch >= 'a' && ch <= 'f') n = n*16+ch-'a'+10;
 143               else { n = uint.max; break; } // invalid digit
 144               if (n > dchar.max) break; // invalid char
 145             }
 146             if (n <= dchar.max) {
 147               if (n == 1) n = 32;
 148               bufpos = xpos-1;
 149               auto sz = encode!(UseReplacementDchar.yes)(ubuf, cast(dchar)n);
 150               foreach (immutable char ch; ubuf[0..sz]) bufPut(ch);
 151             }
 152           } else if (bufpos-xpos > 2 && buf.ptr[xpos] == '#') {
 153             // shoud be decimal code
 154             uint n = 0;
 155             auto pos = xpos+1;
 156             while (pos < bufpos-1) {
 157               char ch = buf.ptr[pos++];
 158               if (ch >= '0' && ch <= '9') n = n*10+ch-'0';
 159               else { n = uint.max; break; } // invalid digit
 160               if (n > dchar.max) break; // invalid char
 161             }
 162             if (n <= dchar.max) {
 163               if (n == 1) n = 32;
 164               bufpos = xpos-1;
 165               auto sz = encode!(UseReplacementDchar.yes)(ubuf, cast(dchar)n);
 166               foreach (immutable char ch; ubuf[0..sz]) bufPut(ch);
 167             }
 168           }
 169           break;
 170       }
 171       skipChar();
 172     }
 173   }
 174
 175   void parseCData () {
 176     clearBuf();
 177     while (!eof) {
 178       if (bufpos >= 3 && buf.ptr[bufpos-1] == '>' && buf.ptr[bufpos-2] == ']' && buf.ptr[bufpos-3] == ']') {
 179         bufpos -= 3;
 180         break;
 181       }
 182       bufPut(curCh);
 183       skipChar();
 184     }
 185     if (tagLevel && bufpos > 0 && content !is null) content(buf[0..bufpos]);
 186     clearBuf();
 187   }
 188
 189   void parseContent () {
 190     clearBuf();
 191     while (!eof) {
 192       if (curCh == '<') break;
 193       if (curCh != '&') {
 194         bufPut(curCh);
 195         skipChar();
 196       } else {
 197         parseEntity(false);
 198       }
 199     }
 200     if (tagLevel && bufpos > 0 && content !is null) content(buf[0..bufpos]);
 201     clearBuf();
 202   }
 203
 204   void parseTag () {
 205     assert(!eof && curCh == '<');
 206     clearBuf();
 207     skipChar();
 208     if (eof) throw new Exception("invalid xml");
 209     bool inlineClose = false, closeTag = false;
 210     if (curCh == '!') {
 211       // either CDATA, or comment-like
 212       skipChar();
 213       if (curCh == '[') {
 214         // this *must* be CDATA
 215         skipChar();
 216         if (curCh != 'C') throw new Exception("invalid xml");
 217         skipChar();
 218         if (curCh != 'D') throw new Exception("invalid xml");
 219         skipChar();
 220         if (curCh != 'A') throw new Exception("invalid xml");
 221         skipChar();
 222         if (curCh != 'T') throw new Exception("invalid xml");
 223         skipChar();
 224         if (curCh != 'A') throw new Exception("invalid xml");
 225         skipChar();
 226         if (curCh != '[') throw new Exception("invalid xml");
 227         skipChar();
 228         clearBuf();
 229         parseCData();
 230         return;
 231       } else if (curCh == '-') {
 232         // comment
 233         skipChar();
 234         if (curCh != '-') throw new Exception("invalid xml");
 235         skipChar();
 236         for (;;) {
 237           if (eof) throw new Exception("invalid xml");
 238           if (curCh == '-') {
 239             skipChar();
 240             if (curCh == '-') {
 241               skipChar();
 242               if (curCh == '>') {
 243                 skipChar();
 244                 break;
 245               }
 246             }
 247           } else {
 248             skipChar();
 249           }
 250         }
 251         clearBuf();
 252         return;
 253       } else {
 254         // !tag
 255         bufPut('!');
 256       }
 257     } else {
 258       if (curCh == '/') { closeTag = true; skipChar(); }
 259       if (curCh == '?') { bufPut(curCh); skipChar(); }
 260     }
 261     if (eof || !isValidNameChar(curCh)) throw new Exception("invalid xml");
 262     while (isValidNameChar(curCh)) {
 263       bufPut(curCh);
 264       skipChar();
 265     }
 266     //{ import std.stdio; writeln("TAG: ", buf[0..bufpos].quote); }
 267     // now parse attributes
 268     scope(exit) attrs.clear();
 269     while (!eof && curCh <= ' ') skipChar();
 270     // closing tag?
 271     auto tagnameend = bufpos;
 272     if (!closeTag) {
 273       // attr=["]name["]
 274       // read the whole tag, so we can add AA items without anchoring stale memory
 275       if (eof) throw new Exception("invalid xml");
 276       if (curCh != '/' && curCh != '>' && curCh != '?') {
 277         bufPut(' ');
 278         auto stpos = bufpos;
 279         char qch = 0;
 280         for (;;) {
 281           if (eof) throw new Exception("invalid xml");
 282           if (qch) {
 283             if (curCh == qch) { qch = 0; curCh = 1; }
 284             if (curCh == '&') {
 285               parseEntity(true);
 286               continue;
 287             }
 288           } else {
 289             if (curCh == '/' || curCh == '>' || curCh == '?') break;
 290             if (curCh == '"' || curCh == '\'') {
 291               qch = curCh;
 292               curCh = 1;
 293             } else if (curCh == 1) {
 294               curCh = 32;
 295             }
 296           }
 297           bufPut(curCh);
 298           skipChar();
 299         }
 300         // now parse attributes
 301         //{ import std.stdio; writeln(": ", buf[stpos..bufpos].quote); }
 302         while (stpos < bufpos) {
 303           while (stpos < bufpos && buf.ptr[stpos] <= ' ' && buf.ptr[stpos] != 1) ++stpos;
 304           if (stpos >= bufpos) break;
 305           //{ import std.stdio; writeln(": ", buf[stpos..bufpos].quote); }
 306           if (!isValidNameChar(buf.ptr[stpos])) throw new Exception("invalid xml: "~buf[stpos..bufpos].quote);
 307           auto nst = stpos;
 308           while (stpos < bufpos && isValidNameChar(buf.ptr[stpos])) ++stpos;
 309           string aname = cast(string)(buf[nst..stpos]); // unsafe cast, but meh...
 310           while (stpos < bufpos && buf.ptr[stpos] <= ' ' && buf.ptr[stpos] != 1) ++stpos;
 311           if (stpos >= bufpos) { attrs[aname] = null; break; } // no value
 312           if (buf.ptr[stpos] != '=') { attrs[aname] = null; continue; } // no value
 313           ++stpos;
 314           while (stpos < bufpos && buf.ptr[stpos] <= ' ' && buf.ptr[stpos] != 1) ++stpos;
 315           if (stpos >= bufpos) { attrs[aname] = buf[bufpos..bufpos]; break; }
 316           //if (buf.ptr[stpos] == '"' || buf.ptr[stpos] == '\'')
 317           if (buf.ptr[stpos] == 1)
 318           {
 319             auto ech = buf.ptr[stpos];
 320             nst = ++stpos;
 321             while (stpos < bufpos && buf.ptr[stpos] != ech) ++stpos;
 322             if (stpos >= bufpos) throw new Exception("invalid xml");
 323             attrs[aname] = buf[nst..stpos];
 324             ++stpos;
 325           } else {
 326             nst = stpos;
 327             while (stpos < bufpos && buf.ptr[stpos] > ' ') ++stpos;
 328             attrs[aname] = buf[nst..stpos];
 329           }
 330         }
 331       }
 332     }
 333     if (curCh == '?') {
 334       if (buf.ptr[0] != '?') throw new Exception("invalid xml");
 335       skipChar();
 336       inlineClose = true;
 337     } else if (buf.ptr[0] != '!') {
 338       if (curCh == '/') { inlineClose = true; skipChar(); }
 339     } else {
 340       inlineClose = true;
 341     }
 342     if (curCh != '>') throw new Exception("invalid xml");
 343     skipChar();
 344     if (closeTag) {
 345       if (inlineClose) throw new Exception("invalid xml");
 346       if (tagEnd !is null) tagEnd(buf[0..tagnameend]);
 347       --tagLevel;
 348     } else {
 349       ++tagLevel;
 350       if (tagStart !is null) tagStart(buf[0..tagnameend], attrs);
 351       if (inlineClose) {
 352         if (tagEnd !is null) tagEnd(buf[0..tagnameend]);
 353         --tagLevel;
 354       }
 355     }
 356   }
 357
 358   while (!eof) {
 359     //writeln("*** ", tagLevel, " ***");
 360     parseContent();
 361     if (eof) break;
 362     if (curCh == '<') {
 363       parseTag();
 364       if (tagLevel < 0) throw new Exception("invalid xml");
 365     }
 366   }
 367
 368   if (tagLevel != 0) throw new Exception("invalid xml");
 369 }
 370
 371
 372 // ////////////////////////////////////////////////////////////////////////// //
 373 // you can use "quantifiers" in pathes, like this:
 374 //   "/a/b/c*/d+/*"
 375 // that means "any number of 'c' tags", "one or more 'd' tags", "any number of any tags"
 376 // the last is useful to parse things like "bold" tag inside "p" tag, for example
 377 final class SaxyEx {
 378 private import std.range;
 379 public:
 380   alias TagOpenCB = void delegate (char[] name, char[][string] attrs);
 381   alias TagOpenCBNA = void delegate (char[] name);
 382   alias TagCloseCB = void delegate (char[] name);
 383   alias TagContentCB = void delegate (char[] text);
 384
 385 private:
 386   static struct PathElement {
 387     string name; // empty: any tag
 388     char quant = 0; // '+', '*', 0
 389   }
 390
 391   static struct TagCB {
 392     enum Type { Open, Close, Content }
 393     Type type;
 394     PathElement[] path;
 395     bool pathHasQuants; // use faster algo if there are no quantifiers
 396     bool openNoAttr;
 397     union {
 398       TagOpenCB open;
 399       TagCloseCB close;
 400       TagContentCB content;
 401     }
 402   }
 403
 404 private:
 405   TagCB[] callbacksOpen;
 406   TagCB[] callbacksClose;
 407   TagCB[] callbacksContent;
 408
 409 public:
 410   this () {}
 411
 412   void load (const(char)[] filename) { loadFile(VFile(filename)); }
 413
 414   void loadStream(ST) (auto ref ST st) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) { loadFile(st); }
 415
 416   void onOpen(ST : const(char)[]) (ST path, TagOpenCB cb) {
 417     assert(cb !is null);
 418     auto tcb = newCallback!"open"(path);
 419     tcb.open = cb;
 420     tcb.openNoAttr = false;
 421   }
 422
 423   void onOpen(ST : const(char)[]) (ST path, TagOpenCBNA cb) {
 424     assert(cb !is null);
 425     auto tcb = newCallback!"open"(path);
 426     tcb.close = cb; // lucky me
 427     tcb.openNoAttr = true;
 428   }
 429
 430   void onClose(ST : const(char)[]) (ST path, TagCloseCB cb) {
 431     assert(cb !is null);
 432     auto tcb = newCallback!"close"(path);
 433     tcb.close = cb;
 434   }
 435
 436   void onContent(ST : const(char)[]) (ST path, TagContentCB cb) {
 437     assert(cb !is null);
 438     auto tcb = newCallback!"content"(path);
 439     tcb.content = cb;
 440   }
 441
 442 private:
 443   TagCB* newCallback(string type, ST : const(char)[]) (ST path) {
 444     static if (is(ST == typeof(null))) {
 445       return newCallback("");
 446     } else {
 447       // parse path
 448       bool hasQuants = false;
 449       PathElement[] pth;
 450       if (path.length) {
 451         while (path.length != 0) {
 452           while (path.length != 0 && path.ptr[0] == '/') path = path[1..$];
 453           if (path.length == 0) break;
 454           usize e = 0;
 455           while (e < path.length && path.ptr[e] != '/') ++e;
 456           //if (e == 1 && path.ptr[0] == '+') throw new Exception("invalid callback path");
 457           if (path.ptr[e-1] == '+' || path.ptr[e-1] == '*') {
 458             pth ~= PathElement(path[0..e-1].idup, path.ptr[e-1]);
 459             hasQuants = true;
 460           } else {
 461             pth ~= PathElement(path[0..e].idup, 0);
 462           }
 463           path = path[e..$];
 464         }
 465         if (pth.length == 0) throw new Exception("invalid callback path");
 466       } else {
 467         hasQuants = true;
 468         pth ~= PathElement(null, '*');
 469       }
 470       TagCB* res;
 471       static if (type == "open") {
 472         callbacksOpen.length += 1;
 473         res = &callbacksOpen[$-1];
 474         res.type = TagCB.Type.Open;
 475       } else static if (type == "close") {
 476         callbacksClose.length += 1;
 477         res = &callbacksClose[$-1];
 478         res.type = TagCB.Type.Close;
 479       } else static if (type == "content") {
 480         callbacksContent.length += 1;
 481         res = &callbacksContent[$-1];
 482         res.type = TagCB.Type.Content;
 483       } else {
 484         static assert(0, "wtf?!");
 485       }
 486       res.path = pth;
 487       res.pathHasQuants = hasQuants;
 488       return res;
 489     }
 490   }
 491
 492   // yes, i can make it faster with some more preprocessing, but why should i bother?
 493   static bool pathHit (const(char)[][] tagStack, PathElement[] path, bool hasQuants) {
 494     version(none) {
 495       import std.stdio;
 496       writeln("tagStack: ", tagStack[]);
 497       foreach (const ref PathElement pe; path) {
 498         write((pe.quant ? pe.quant : ' '), pe.name);
 499       }
 500       writeln;
 501     }
 502     if (!hasQuants) {
 503       // easy case
 504       if (tagStack.length != path.length) return false;
 505       foreach_reverse (immutable idx, const ref PathElement pe; path) {
 506         if (tagStack.ptr[idx] != pe.name) return false;
 507       }
 508       return true;
 509     }
 510
 511     static bool hasQ (PathElement[] path) {
 512       foreach (const ref PathElement pe; path) if (pe.quant) return true;
 513       return false;
 514     }
 515
 516     while (path.length > 0) {
 517       auto pe = &path[0];
 518       path = path[1..$];
 519       if (pe.quant == '*') {
 520         if (pe.name.length == 0) {
 521           // any number of any tag, including zero
 522           if (path.length == 0) return true;
 523           while (tagStack.length > 0) {
 524             if (pathHit(tagStack, path, hasQ(path))) return true;
 525             tagStack = tagStack[1..$];
 526           }
 527           return false;
 528         } else {
 529           // any number of given tag, including zero
 530           // skip this tag and continue
 531           while (tagStack.length && tagStack.ptr[0] == pe.name) tagStack = tagStack[1..$];
 532         }
 533       } else if (pe.quant == '+') {
 534         if (pe.name.length == 0) {
 535           // any number of any tag, not including zero
 536           if (path.length == 0) return (tagStack.length > 0);
 537           while (tagStack.length > 0) {
 538             if (pathHit(tagStack, path, hasQ(path))) return true;
 539             tagStack = tagStack[1..$];
 540           }
 541           return false;
 542         } else {
 543           // any number of given tag, not including zero
 544           if (tagStack.length == 0 || tagStack.ptr[0] != pe.name) return false;
 545           // skip this tag and continue
 546           while (tagStack.length && tagStack.ptr[0] == pe.name) tagStack = tagStack[1..$];
 547         }
 548       } else if (pe.name.length != 0) {
 549         // named tag
 550         if (tagStack.length == 0) return false;
 551         if (pe.name != tagStack.ptr[0]) return false;
 552         tagStack = tagStack[1..$];
 553       } else {
 554         // any tag
 555         tagStack = tagStack[1..$];
 556       }
 557     }
 558     return (tagStack.length == 0);
 559   }
 560
 561 private:
 562   void loadFile(ST) (auto ref ST fl) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) {
 563     bool seenXML;
 564     bool tagStackLastWasAppend = true;
 565     const(char)[][] tagStack; // all data is in tagStackBuf
 566     char[] tagStackBuf;
 567     scope(exit) tagStackBuf.destroy;
 568     uint tagStackBufPos;
 569     EncodingScheme efrom, eto;
 570     scope(exit) { efrom.destroy; eto.destroy; }
 571     char[] recbuf; // recode buffer
 572     usize rcpos; // for recode buffer
 573     scope(exit) recbuf.destroy;
 574
 575     void pushTag (const(char)[] s) {
 576       if (s.length) {
 577         if (tagStackBufPos+s.length >= tagStackBuf.length) {
 578           if (tagStackBufPos >= int.max/2) throw new Exception("too many tags");
 579           tagStackBuf.length = ((tagStackBufPos+s.length)|0x3ff)+1;
 580         }
 581         tagStackBuf[tagStackBufPos..tagStackBufPos+s.length] = s[];
 582         if (!tagStackLastWasAppend) { tagStack.assumeSafeAppend; tagStackLastWasAppend = true; }
 583         tagStack ~= tagStackBuf[tagStackBufPos..tagStackBufPos+s.length];
 584         tagStackBufPos += s.length;
 585       } else {
 586         if (!tagStackLastWasAppend) { tagStack.assumeSafeAppend; tagStackLastWasAppend = true; }
 587         tagStack ~= "";
 588       }
 589     }
 590
 591     void popTag () {
 592       tagStack.length -= 1;
 593       auto idx = tagStack.length;
 594       tagStackBufPos -= tagStack.ptr[idx].length;
 595       tagStackLastWasAppend = false;
 596     }
 597
 598     char[] nrecode(bool doreset=true) (char[] text) {
 599       if (efrom is null) return text; // nothing to do
 600       static if (doreset) rcpos = 0;
 601       bool needRecode = false;
 602       foreach (char ch; text) if (ch >= 0x80) { needRecode = true; break; }
 603       if (!needRecode) return text;
 604       auto stpos = rcpos;
 605       ubyte[16] buf;
 606       auto ub = cast(const(ubyte)[])text;
 607       while (ub.length > 0) {
 608         dchar dc = efrom.safeDecode(ub);
 609         if (dc == INVALID_SEQUENCE) dc = '?';
 610         auto len = eto.encode(dc, buf);
 611         if (rcpos+len > recbuf.length) {
 612           recbuf.assumeSafeAppend; // the user is expected to copy data
 613           recbuf.length = ((rcpos+len)|0x3ff)+1;
 614         }
 615         recbuf[rcpos..rcpos+len] = cast(char[])buf[0..len];
 616         rcpos += len;
 617       }
 618       return recbuf[stpos..rcpos];
 619     }
 620
 621     xmparse(fl,
 622       (char[] name, char[][string] attrs) {
 623         if (name == "?xml") {
 624           if (seenXML) throw new Exception("duplicate '?xml?' tag");
 625           seenXML = true;
 626           if (auto ec = "encoding" in attrs) {
 627             foreach (ref char ch; *ec) {
 628               import std.ascii : toLower;
 629               ch = ch.toLower;
 630             }
 631             if ((*ec).length && *ec != "utf-8") {
 632               efrom = EncodingScheme.create(cast(string)(*ec)); // let's hope that it is safe...
 633               eto = EncodingScheme.create("utf-8");
 634             }
 635           }
 636           return;
 637         }
 638         if (!seenXML) throw new Exception("no '?xml?' tag");
 639         pushTag(name);
 640         bool attrsRecoded = (efrom is null);
 641         foreach (ref TagCB tcb; callbacksOpen) {
 642           if (tcb.type == TagCB.Type.Open && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 643             if (tcb.openNoAttr) {
 644               tcb.close(name);
 645             } else {
 646               // recode attrs and call the callback
 647               if (!attrsRecoded) {
 648                 rcpos = 0; // reset recode
 649                 foreach (ref v; attrs.byValue) v = nrecode!false(v);
 650                 attrsRecoded = true;
 651               }
 652               tcb.open(name, attrs);
 653             }
 654           }
 655         }
 656       },
 657       (char[] name) {
 658         if (name == "?xml") return;
 659         if (tagStack.length == 0 || tagStack[$-1] != name) throw new Exception("unbalanced xml tags");
 660         foreach (ref TagCB tcb; callbacksClose) {
 661           if (tcb.type == TagCB.Type.Close && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 662             // call the callback
 663             tcb.close(name);
 664           }
 665         }
 666         popTag();
 667       },
 668       (char[] text) {
 669         bool textRecoded = (efrom is null);
 670         foreach (ref TagCB tcb; callbacksContent) {
 671           if (tcb.type == TagCB.Type.Content && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 672             // recode text and call the callback
 673             if (!textRecoded) {
 674               text = nrecode(text);
 675               textRecoded = true;
 676             }
 677             tcb.content(text);
 678           }
 679         }
 680       },
 681     );
 682   }
 683 }