saxy.d

   1 /* Invisible Vector Library
   2  * coded by Ketmar // Invisible Vector <ketmar@ketmar.no-ip.org>
   3  * Understanding is not required. Only obedience.
   4  *
   5  * This software is provided 'as-is', without any express or implied
   6  * warranty.  In no event will the authors be held liable for any damages
   7  * arising from the use of this software.
   8  * Permission is granted to anyone to use this software for any purpose,
   9  * including commercial applications, and to alter it and redistribute it
  10  * freely, subject to the following restrictions:
  11  * 1. The origin of this software must not be misrepresented; you must not
  12  *    claim that you wrote the original software. If you use this software
  13  *    in a product, an acknowledgment in the product documentation would be
  14  *    appreciated but is not required.
  15  * 2. Altered source versions must be plainly marked as such, and must not be
  16  *    misrepresented as being the original software.
  17  * 3. This notice may not be removed or altered from any source distribution.
  18  *
  19  * This program is distributed in the hope that it will be useful,
  20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  22  */
  23 // SAX style xml parser
  24 module iv.saxy /*is aliced*/;
  25
  26 import std.encoding;
  27 import std.range;
  28
  29 import iv.alice;
  30 import iv.strex;
  31 import iv.vfs;
  32
  33
  34 // ////////////////////////////////////////////////////////////////////////// //
  35 //*WARNING*: attr keys are *NOT* strings!
  36 void xmparse(ST) (auto ref ST fl,
  37   scope void delegate (char[] name, char[][string] attrs) tagStart,
  38   scope void delegate (char[] name) tagEnd,
  39   scope void delegate (char[] text) content,
  40 ) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) {
  41   char[] buf;
  42   uint bufpos;
  43   char[][string] attrs;
  44   scope(exit) {
  45     attrs.destroy;
  46     buf.destroy;
  47   }
  48
  49   static bool isValidNameChar() (char ch) {
  50     pragma(inline, true);
  51     return
  52       (ch >= '0' && ch <= '9') ||
  53       (ch >= 'A' && ch <= 'Z') ||
  54       (ch >= 'a' && ch <= 'z') ||
  55       ch == '_' || ch == '-' || ch == ':';
  56   }
  57
  58   int tagLevel = 0;
  59
  60   void bufPut (const(char)[] chars...) {
  61     if (/*tagLevel &&*/ chars.length) {
  62       if (chars.length+bufpos > buf.length) {
  63         if (chars.length+bufpos >= int.max) throw new Exception("out of memory in xml parser");
  64         buf.assumeSafeAppend;
  65         buf.length = ((chars.length+bufpos)|0x3ff)+1;
  66       }
  67       buf[bufpos..bufpos+chars.length] = chars[];
  68       bufpos += chars.length;
  69     }
  70   }
  71
  72   void clearBuf () {
  73     bufpos = 0;
  74   }
  75
  76   char curCh;
  77   bool eof;
  78
  79   static if (isReadableStream!ST) {
  80     char[] rdbuf;
  81     scope(exit) rdbuf.destroy;
  82     uint rdbufpos, rdbufused;
  83   }
  84
  85   void skipChar () {
  86     if (!eof) {
  87       static if (isReadableStream!ST) {
  88         // buffer more bytes
  89         if (rdbufpos >= rdbufused) {
  90           if (rdbuf.length == 0) rdbuf.length = 32*1024;
  91           auto rd = fl.rawRead(rdbuf[]);
  92           if (rd.length == 0) { eof = true; curCh = 0; return; }
  93           rdbufpos = 0;
  94           rdbufused = cast(uint)rd.length;
  95         }
  96         curCh = rdbuf.ptr[rdbufpos++];
  97       } else {
  98         if (fl.empty) { eof = true; curCh = 0; return; }
  99         curCh = fl.front;
 100         fl.popFront;
 101       }
 102       if (curCh == 0) curCh = ' ';
 103     }
 104   }
 105
 106   // curCh is '&'
 107   void parseEntity (bool inattr) {
 108     assert(curCh == '&');
 109     bufPut(curCh);
 110     auto xpos = bufpos;
 111     skipChar();
 112     if (inattr) {
 113       while (!eof && curCh != '/' && curCh != '>' && curCh != '?' && curCh != ';' && bufpos-xpos < 9) {
 114         bufPut(curCh);
 115         skipChar();
 116       }
 117     } else {
 118       while (!eof && curCh != '<' && curCh != ';' && bufpos-xpos < 9) {
 119         bufPut(curCh);
 120         skipChar();
 121       }
 122     }
 123     if (!eof && curCh == ';' && bufpos > xpos) {
 124       import std.utf : encode, UseReplacementDchar;
 125       char[4] ubuf = void; // utf buffer
 126       switch (buf[xpos..bufpos]) {
 127         case "lt": bufpos = xpos-1; bufPut('<'); break;
 128         case "gt": bufpos = xpos-1; bufPut('>'); break;
 129         case "amp": bufpos = xpos-1; bufPut('&'); break;
 130         case "quot": bufpos = xpos-1; bufPut('"'); break;
 131         case "apos": bufpos = xpos-1; bufPut('\''); break;
 132         default:
 133           bufPut(curCh); // first put ';'
 134           if (bufpos-xpos > 3 && buf.ptr[xpos] == '#' && buf.ptr[xpos+1] == 'x') {
 135             // should be hex code
 136             uint n = 0;
 137             auto pos = xpos+2;
 138             while (pos < bufpos-1) {
 139               char ch = buf.ptr[pos++];
 140                    if (ch >= '0' && ch <= '9') n = n*16+ch-'0';
 141               else if (ch >= 'A' && ch <= 'F') n = n*16+ch-'A'+10;
 142               else if (ch >= 'a' && ch <= 'f') n = n*16+ch-'a'+10;
 143               else { n = uint.max; break; } // invalid digit
 144               if (n > dchar.max) break; // invalid char
 145             }
 146             if (n <= dchar.max) {
 147               bufpos = xpos-1;
 148               auto sz = encode!(UseReplacementDchar.yes)(ubuf, cast(dchar)n);
 149               foreach (immutable char ch; ubuf[0..sz]) bufPut(ch);
 150             }
 151           } else if (bufpos-xpos > 2 && buf.ptr[xpos] == '#') {
 152             // shoud be decimal code
 153             uint n = 0;
 154             auto pos = xpos+1;
 155             while (pos < bufpos-1) {
 156               char ch = buf.ptr[pos++];
 157               if (ch >= '0' && ch <= '9') n = n*10+ch-'0';
 158               else { n = uint.max; break; } // invalid digit
 159               if (n > dchar.max) break; // invalid char
 160             }
 161             if (n <= dchar.max) {
 162               bufpos = xpos-1;
 163               auto sz = encode!(UseReplacementDchar.yes)(ubuf, cast(dchar)n);
 164               foreach (immutable char ch; ubuf[0..sz]) bufPut(ch);
 165             }
 166           }
 167           break;
 168       }
 169       skipChar();
 170     }
 171   }
 172
 173   void parseCData () {
 174     clearBuf();
 175     while (!eof) {
 176       if (bufpos >= 3 && buf.ptr[bufpos-1] == '>' && buf.ptr[bufpos-2] == ']' && buf.ptr[bufpos-3] == ']') {
 177         bufpos -= 3;
 178         break;
 179       }
 180       bufPut(curCh);
 181       skipChar();
 182     }
 183     if (tagLevel && bufpos > 0 && content !is null) content(buf[0..bufpos]);
 184     clearBuf();
 185   }
 186
 187   void parseContent () {
 188     clearBuf();
 189     while (!eof) {
 190       if (curCh == '<') break;
 191       if (curCh != '&') {
 192         bufPut(curCh);
 193         skipChar();
 194       } else {
 195         parseEntity(false);
 196       }
 197     }
 198     if (tagLevel && bufpos > 0 && content !is null) content(buf[0..bufpos]);
 199     clearBuf();
 200   }
 201
 202   void parseTag () {
 203     assert(!eof && curCh == '<');
 204     clearBuf();
 205     skipChar();
 206     if (eof) throw new Exception("invalid xml");
 207     bool inlineClose = false, closeTag = false;
 208     if (curCh == '!') {
 209       // either CDATA, or comment-like
 210       skipChar();
 211       if (curCh == '[') {
 212         // this *must* be CDATA
 213         skipChar();
 214         if (curCh != 'C') throw new Exception("invalid xml");
 215         skipChar();
 216         if (curCh != 'D') throw new Exception("invalid xml");
 217         skipChar();
 218         if (curCh != 'A') throw new Exception("invalid xml");
 219         skipChar();
 220         if (curCh != 'T') throw new Exception("invalid xml");
 221         skipChar();
 222         if (curCh != 'A') throw new Exception("invalid xml");
 223         skipChar();
 224         if (curCh != '[') throw new Exception("invalid xml");
 225         skipChar();
 226         clearBuf();
 227         parseCData();
 228         return;
 229       } else if (curCh == '-') {
 230         // comment
 231         skipChar();
 232         if (curCh != '-') throw new Exception("invalid xml");
 233         skipChar();
 234         for (;;) {
 235           if (eof) throw new Exception("invalid xml");
 236           if (curCh == '-') {
 237             skipChar();
 238             if (curCh == '-') {
 239               skipChar();
 240               if (curCh == '>') {
 241                 skipChar();
 242                 break;
 243               }
 244             }
 245           } else {
 246             skipChar();
 247           }
 248         }
 249         clearBuf();
 250         return;
 251       } else {
 252         // !tag
 253         bufPut('!');
 254       }
 255     } else {
 256       if (curCh == '/') { closeTag = true; skipChar(); }
 257       if (curCh == '?') { bufPut(curCh); skipChar(); }
 258     }
 259     if (eof || !isValidNameChar(curCh)) throw new Exception("invalid xml");
 260     while (isValidNameChar(curCh)) {
 261       bufPut(curCh);
 262       skipChar();
 263     }
 264     //{ import std.stdio; writeln("TAG: ", buf[0..bufpos].quote); }
 265     // now parse attributes
 266     scope(exit) attrs.clear();
 267     while (!eof && curCh <= ' ') skipChar();
 268     // closing tag?
 269     auto tagnameend = bufpos;
 270     if (!closeTag) {
 271       // attr=["]name["]
 272       // read the whole tag, so we can add AA items without anchoring stale memory
 273       if (eof) throw new Exception("invalid xml");
 274       if (curCh != '/' && curCh != '>' && curCh != '?') {
 275         bufPut(' ');
 276         auto stpos = bufpos;
 277         char qch = 0;
 278         for (;;) {
 279           if (eof) throw new Exception("invalid xml");
 280           if (qch) {
 281             if (curCh == qch) qch = 0;
 282             if (curCh == '&') {
 283               parseEntity(true);
 284               continue;
 285             }
 286           } else {
 287             if (curCh == '/' || curCh == '>' || curCh == '?') break;
 288             if (curCh == '"' || curCh == '\'') qch = curCh;
 289           }
 290           bufPut(curCh);
 291           skipChar();
 292         }
 293         // now parse attributes
 294         while (stpos < bufpos) {
 295           while (stpos < bufpos && buf.ptr[stpos] <= ' ') ++stpos;
 296           if (stpos >= bufpos) break;
 297           //{ import std.stdio; writeln(": ", buf[stpos..bufpos].quote); }
 298           if (!isValidNameChar(buf.ptr[stpos])) throw new Exception("invalid xml");
 299           auto nst = stpos;
 300           while (stpos < bufpos && isValidNameChar(buf.ptr[stpos])) ++stpos;
 301           string aname = cast(string)(buf[nst..stpos]); // unsafe cast, but meh...
 302           while (stpos < bufpos && buf.ptr[stpos] <= ' ') ++stpos;
 303           if (stpos >= bufpos) { attrs[aname] = null; break; } // no value
 304           if (buf.ptr[stpos] != '=') { attrs[aname] = null; continue; } // no value
 305           ++stpos;
 306           if (stpos >= bufpos) { attrs[aname] = buf[bufpos..bufpos]; break; }
 307           if (buf.ptr[stpos] == '"' || buf.ptr[stpos] == '\'') {
 308             auto ech = buf.ptr[stpos];
 309             nst = ++stpos;
 310             while (stpos < bufpos && buf.ptr[stpos] != ech) ++stpos;
 311             if (stpos >= bufpos) throw new Exception("invalid xml");
 312             attrs[aname] = buf[nst..stpos];
 313             ++stpos;
 314           } else {
 315             nst = stpos;
 316             while (stpos < bufpos && buf.ptr[stpos] > ' ') ++stpos;
 317             attrs[aname] = buf[nst..stpos];
 318           }
 319         }
 320       }
 321     }
 322     if (curCh == '?') {
 323       if (buf.ptr[0] != '?') throw new Exception("invalid xml");
 324       skipChar();
 325       inlineClose = true;
 326     } else if (buf.ptr[0] != '!') {
 327       if (curCh == '/') { inlineClose = true; skipChar(); }
 328     } else {
 329       inlineClose = true;
 330     }
 331     if (curCh != '>') throw new Exception("invalid xml");
 332     skipChar();
 333     if (closeTag) {
 334       if (inlineClose) throw new Exception("invalid xml");
 335       if (tagEnd !is null) tagEnd(buf[0..tagnameend]);
 336       --tagLevel;
 337     } else {
 338       ++tagLevel;
 339       if (tagStart !is null) tagStart(buf[0..tagnameend], attrs);
 340       if (inlineClose) {
 341         if (tagEnd !is null) tagEnd(buf[0..tagnameend]);
 342         --tagLevel;
 343       }
 344     }
 345   }
 346
 347   while (!eof) {
 348     //writeln("*** ", tagLevel, " ***");
 349     parseContent();
 350     if (eof) break;
 351     if (curCh == '<') {
 352       parseTag();
 353       if (tagLevel < 0) throw new Exception("invalid xml");
 354     }
 355   }
 356
 357   if (tagLevel != 0) throw new Exception("invalid xml");
 358 }
 359
 360
 361 // ////////////////////////////////////////////////////////////////////////// //
 362 // you can use "quantifiers" in pathes, like this:
 363 //   "/a/b/c*/d+/*"
 364 // that means "any number of 'c' tags", "one or more 'd' tags", "any number of any tags"
 365 // the last is useful to parse things like "bold" tag inside "p" tag, for example
 366 final class SaxyEx {
 367 private import std.range;
 368 public:
 369   alias TagOpenCB = void delegate (char[] name, char[][string] attrs);
 370   alias TagOpenCBNA = void delegate (char[] name);
 371   alias TagCloseCB = void delegate (char[] name);
 372   alias TagContentCB = void delegate (char[] text);
 373
 374 private:
 375   static struct PathElement {
 376     string name; // empty: any tag
 377     char quant = 0; // '+', '*', 0
 378   }
 379
 380   static struct TagCB {
 381     enum Type { Open, Close, Content }
 382     Type type;
 383     PathElement[] path;
 384     bool pathHasQuants; // use faster algo if there are no quantifiers
 385     bool openNoAttr;
 386     union {
 387       TagOpenCB open;
 388       TagCloseCB close;
 389       TagContentCB content;
 390     }
 391   }
 392
 393 private:
 394   TagCB[] callbacksOpen;
 395   TagCB[] callbacksClose;
 396   TagCB[] callbacksContent;
 397
 398 public:
 399   this () {}
 400
 401   void load (const(char)[] filename) { loadFile(VFile(filename)); }
 402
 403   void loadStream(ST) (auto ref ST st) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) { loadFile(st); }
 404
 405   void onOpen(ST : const(char)[]) (ST path, TagOpenCB cb) {
 406     assert(cb !is null);
 407     auto tcb = newCallback!"open"(path);
 408     tcb.open = cb;
 409     tcb.openNoAttr = false;
 410   }
 411
 412   void onOpen(ST : const(char)[]) (ST path, TagOpenCBNA cb) {
 413     assert(cb !is null);
 414     auto tcb = newCallback!"open"(path);
 415     tcb.close = cb; // lucky me
 416     tcb.openNoAttr = true;
 417   }
 418
 419   void onClose(ST : const(char)[]) (ST path, TagCloseCB cb) {
 420     assert(cb !is null);
 421     auto tcb = newCallback!"close"(path);
 422     tcb.close = cb;
 423   }
 424
 425   void onContent(ST : const(char)[]) (ST path, TagContentCB cb) {
 426     assert(cb !is null);
 427     auto tcb = newCallback!"content"(path);
 428     tcb.content = cb;
 429   }
 430
 431 private:
 432   TagCB* newCallback(string type, ST : const(char)[]) (ST path) {
 433     static if (is(ST == typeof(null))) {
 434       return newCallback("");
 435     } else {
 436       // parse path
 437       bool hasQuants = false;
 438       PathElement[] pth;
 439       if (path.length) {
 440         while (path.length != 0) {
 441           while (path.length != 0 && path.ptr[0] == '/') path = path[1..$];
 442           if (path.length == 0) break;
 443           usize e = 0;
 444           while (e < path.length && path.ptr[e] != '/') ++e;
 445           //if (e == 1 && path.ptr[0] == '+') throw new Exception("invalid callback path");
 446           if (path.ptr[e-1] == '+' || path.ptr[e-1] == '*') {
 447             pth ~= PathElement(path[0..e-1].idup, path.ptr[e-1]);
 448             hasQuants = true;
 449           } else {
 450             pth ~= PathElement(path[0..e].idup, 0);
 451           }
 452           path = path[e..$];
 453         }
 454         if (pth.length == 0) throw new Exception("invalid callback path");
 455       } else {
 456         hasQuants = true;
 457         pth ~= PathElement(null, '*');
 458       }
 459       TagCB* res;
 460       static if (type == "open") {
 461         callbacksOpen.length += 1;
 462         res = &callbacksOpen[$-1];
 463         res.type = TagCB.Type.Open;
 464       } else static if (type == "close") {
 465         callbacksClose.length += 1;
 466         res = &callbacksClose[$-1];
 467         res.type = TagCB.Type.Close;
 468       } else static if (type == "content") {
 469         callbacksContent.length += 1;
 470         res = &callbacksContent[$-1];
 471         res.type = TagCB.Type.Content;
 472       } else {
 473         static assert(0, "wtf?!");
 474       }
 475       res.path = pth;
 476       res.pathHasQuants = hasQuants;
 477       return res;
 478     }
 479   }
 480
 481   // yes, i can make it faster with some more preprocessing, but why should i bother?
 482   static bool pathHit (const(char)[][] tagStack, PathElement[] path, bool hasQuants) {
 483     version(none) {
 484       import std.stdio;
 485       writeln("tagStack: ", tagStack[]);
 486       foreach (const ref PathElement pe; path) {
 487         write((pe.quant ? pe.quant : ' '), pe.name);
 488       }
 489       writeln;
 490     }
 491     if (!hasQuants) {
 492       // easy case
 493       if (tagStack.length != path.length) return false;
 494       foreach_reverse (immutable idx, const ref PathElement pe; path) {
 495         if (tagStack.ptr[idx] != pe.name) return false;
 496       }
 497       return true;
 498     }
 499
 500     static bool hasQ (PathElement[] path) {
 501       foreach (const ref PathElement pe; path) if (pe.quant) return true;
 502       return false;
 503     }
 504
 505     while (path.length > 0) {
 506       auto pe = &path[0];
 507       path = path[1..$];
 508       if (pe.quant == '*') {
 509         if (pe.name.length == 0) {
 510           // any number of any tag, including zero
 511           if (path.length == 0) return true;
 512           while (tagStack.length > 0) {
 513             if (pathHit(tagStack, path, hasQ(path))) return true;
 514             tagStack = tagStack[1..$];
 515           }
 516           return false;
 517         } else {
 518           // any number of given tag, including zero
 519           // skip this tag and continue
 520           while (tagStack.length && tagStack.ptr[0] == pe.name) tagStack = tagStack[1..$];
 521         }
 522       } else if (pe.quant == '+') {
 523         if (pe.name.length == 0) {
 524           // any number of any tag, not including zero
 525           if (path.length == 0) return (tagStack.length > 0);
 526           while (tagStack.length > 0) {
 527             if (pathHit(tagStack, path, hasQ(path))) return true;
 528             tagStack = tagStack[1..$];
 529           }
 530           return false;
 531         } else {
 532           // any number of given tag, not including zero
 533           if (tagStack.length == 0 || tagStack.ptr[0] != pe.name) return false;
 534           // skip this tag and continue
 535           while (tagStack.length && tagStack.ptr[0] == pe.name) tagStack = tagStack[1..$];
 536         }
 537       } else if (pe.name.length != 0) {
 538         // named tag
 539         if (tagStack.length == 0) return false;
 540         if (pe.name != tagStack.ptr[0]) return false;
 541         tagStack = tagStack[1..$];
 542       } else {
 543         // any tag
 544         tagStack = tagStack[1..$];
 545       }
 546     }
 547     return (tagStack.length == 0);
 548   }
 549
 550 private:
 551   void loadFile(ST) (auto ref ST fl) if (isReadableStream!ST || (isInputRange!ST && is(ElementEncodingType!ST == char))) {
 552     bool seenXML;
 553     bool tagStackLastWasAppend = true;
 554     const(char)[][] tagStack; // all data is in tagStackBuf
 555     char[] tagStackBuf;
 556     scope(exit) tagStackBuf.destroy;
 557     uint tagStackBufPos;
 558     EncodingScheme efrom, eto;
 559     scope(exit) { efrom.destroy; eto.destroy; }
 560     char[] recbuf; // recode buffer
 561     usize rcpos; // for recode buffer
 562     scope(exit) recbuf.destroy;
 563
 564     void pushTag (const(char)[] s) {
 565       if (s.length) {
 566         if (tagStackBufPos+s.length >= tagStackBuf.length) {
 567           if (tagStackBufPos >= int.max/2) throw new Exception("too many tags");
 568           tagStackBuf.length = ((tagStackBufPos+s.length)|0x3ff)+1;
 569         }
 570         tagStackBuf[tagStackBufPos..tagStackBufPos+s.length] = s[];
 571         if (!tagStackLastWasAppend) { tagStack.assumeSafeAppend; tagStackLastWasAppend = true; }
 572         tagStack ~= tagStackBuf[tagStackBufPos..tagStackBufPos+s.length];
 573         tagStackBufPos += s.length;
 574       } else {
 575         if (!tagStackLastWasAppend) { tagStack.assumeSafeAppend; tagStackLastWasAppend = true; }
 576         tagStack ~= "";
 577       }
 578     }
 579
 580     void popTag () {
 581       tagStack.length -= 1;
 582       auto idx = tagStack.length;
 583       tagStackBufPos -= tagStack.ptr[idx].length;
 584       tagStackLastWasAppend = false;
 585     }
 586
 587     char[] nrecode(bool doreset=true) (char[] text) {
 588       if (efrom is null) return text; // nothing to do
 589       static if (doreset) rcpos = 0;
 590       bool needRecode = false;
 591       foreach (char ch; text) if (ch >= 0x80) { needRecode = true; break; }
 592       if (!needRecode) return text;
 593       auto stpos = rcpos;
 594       ubyte[16] buf;
 595       auto ub = cast(const(ubyte)[])text;
 596       while (ub.length > 0) {
 597         dchar dc = efrom.safeDecode(ub);
 598         if (dc == INVALID_SEQUENCE) dc = '?';
 599         auto len = eto.encode(dc, buf);
 600         if (rcpos+len > recbuf.length) {
 601           recbuf.assumeSafeAppend; // the user is expected to copy data
 602           recbuf.length = ((rcpos+len)|0x3ff)+1;
 603         }
 604         recbuf[rcpos..rcpos+len] = cast(char[])buf[0..len];
 605         rcpos += len;
 606       }
 607       return recbuf[stpos..rcpos];
 608     }
 609
 610     xmparse(fl,
 611       (char[] name, char[][string] attrs) {
 612         if (name == "?xml") {
 613           if (seenXML) throw new Exception("duplicate '?xml?' tag");
 614           seenXML = true;
 615           if (auto ec = "encoding" in attrs) {
 616             foreach (ref char ch; *ec) {
 617               import std.ascii : toLower;
 618               ch = ch.toLower;
 619             }
 620             if ((*ec).length && *ec != "utf-8") {
 621               efrom = EncodingScheme.create(cast(string)(*ec)); // let's hope that it is safe...
 622               eto = EncodingScheme.create("utf-8");
 623             }
 624           }
 625           return;
 626         }
 627         if (!seenXML) throw new Exception("no '?xml?' tag");
 628         pushTag(name);
 629         bool attrsRecoded = (efrom is null);
 630         foreach (ref TagCB tcb; callbacksOpen) {
 631           if (tcb.type == TagCB.Type.Open && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 632             if (tcb.openNoAttr) {
 633               tcb.close(name);
 634             } else {
 635               // recode attrs and call the callback
 636               if (!attrsRecoded) {
 637                 rcpos = 0; // reset recode
 638                 foreach (ref v; attrs.byValue) v = nrecode!false(v);
 639                 attrsRecoded = true;
 640               }
 641               tcb.open(name, attrs);
 642             }
 643           }
 644         }
 645       },
 646       (char[] name) {
 647         if (name == "?xml") return;
 648         if (tagStack.length == 0 || tagStack[$-1] != name) throw new Exception("unbalanced xml tags");
 649         foreach (ref TagCB tcb; callbacksClose) {
 650           if (tcb.type == TagCB.Type.Close && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 651             // call the callback
 652             tcb.close(name);
 653           }
 654         }
 655         popTag();
 656       },
 657       (char[] text) {
 658         bool textRecoded = (efrom is null);
 659         foreach (ref TagCB tcb; callbacksContent) {
 660           if (tcb.type == TagCB.Type.Content && pathHit(tagStack, tcb.path, tcb.pathHasQuants)) {
 661             // recode text and call the callback
 662             if (!textRecoded) {
 663               text = nrecode(text);
 664               textRecoded = true;
 665             }
 666             tcb.content(text);
 667           }
 668         }
 669       },
 670     );
 671   }
 672 }