Adding extra charsets for ActionMailer unit tests, if you're looking to parse incomin...
[akelos.git] / lib / AkXhtmlValidator.php
blob892e5ae58381c92cd49814c2f31e3e2db13c4f3f
1 <?php
2 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
3 // +----------------------------------------------------------------------+
4 // | Akelos Framework - http://www.akelos.org |
5 // +----------------------------------------------------------------------+
6 // | Copyright (c) 2002-2006, Akelos Media, S.L. & Bermi Ferrer Martinez |
7 // | Released under the GNU Lesser General Public License, see LICENSE.txt|
8 // +----------------------------------------------------------------------+
10 /**
11 * @package ActionView
12 * @subpackage Utils
13 * @author Bermi Ferrer <bermi a.t akelos c.om>
14 * @copyright Copyright (c) 2002-2006, Akelos Media, S.L. http://www.akelos.org
15 * @license GNU Lesser General Public License <http://www.gnu.org/copyleft/lesser.html>
18 class AkXhtmlValidator
20 var $_attributes = array(
21 'core' => array(
22 'except' => array(
23 'base',
24 'head',
25 'html',
26 'meta',
27 'param',
28 'script',
29 'style',
30 'title'
31 ) ,
32 'attributes' => array(
33 'class',
34 'id',
35 'style',
36 'title',
37 'accesskey',
38 'tabindex'
39 ) ,
40 ) ,
41 'language' => array(
42 'except' => array(
43 'base',
44 'br',
45 'hr',
46 'iframe',
47 'param',
48 'script'
49 ) ,
50 'attributes' => array(
51 'dir' => array(
52 'ltr',
53 'rtl'
54 ) ,
55 'lang',
56 'xml:lang'
57 ) ,
58 ) ,
59 'keyboard' => array(
60 'attributes' => array(
61 'accesskey' => '/^(\w){1}$/',
62 'tabindex' => '/^(\d)+$/'
63 ) ,
64 ) ,
66 var $_events = array(
67 'window' => array(
68 'only' => array(
69 'body'
70 ) ,
71 'attributes' => array(
72 'onload',
73 'onunload'
74 ) ,
75 ) ,
76 'form' => array(
77 'only' => array(
78 'form',
79 'input',
80 'textarea',
81 'select',
82 'a',
83 'label',
84 'button'
85 ) ,
86 'attributes' => array(
87 'onchange',
88 'onsubmit',
89 'onreset',
90 'onselect',
91 'onblur',
92 'onfocus'
93 ) ,
94 ) ,
95 'keyboard' => array(
96 'except' => array(
97 'base',
98 'bdo',
99 'br',
100 'frame',
101 'frameset',
102 'head',
103 'html',
104 'iframe',
105 'meta',
106 'param',
107 'script',
108 'style',
109 'title'
111 'attributes' => array(
112 'onkeydown',
113 'onkeypress',
114 'onkeyup'
117 'mouse' => array(
118 'except' => array(
119 'base',
120 'bdo',
121 'br',
122 'head',
123 'html',
124 'meta',
125 'param',
126 'script',
127 'style',
128 'title'
130 'attributes' => array(
131 'onclick',
132 'ondblclick',
133 'onmousedown',
134 'onmousemove',
135 'onmouseover',
136 'onmouseout',
137 'onmouseup'
141 var $_tags = array(
142 'a' => array(
143 'attributes' => array(
144 'charset',
145 'coords',
146 'href',
147 'hreflang',
148 'name',
149 'rel' => '/^(alternate|designates|stylesheet|start|next|prev|contents|index|glossary|copyright|chapter|section|subsection|appendix|help|bookmark| |shortcut|icon)+$/',
150 'rev' => '/^(alternate|designates|stylesheet|start|next|prev|contents|index|glossary|copyright|chapter|section|subsection|appendix|help|bookmark| |shortcut|icon)+$/',
151 'shape' => '/^(rect|rectangle|circ|circle|poly|polygon)$/',
152 'type',
155 'abbr',
156 'acronym',
157 'address',
158 'area' => array(
159 'attributes' => array(
160 'alt',
161 'coords',
162 'href',
163 'nohref' => '/^(true|false)$/',
164 'shape' => '/^(rect|rectangle|circ|circle|poly|polygon)$/'
166 'required' => array(
167 'alt'
170 'b',
171 'base' => array(
172 'attributes' => array(
173 'href'
175 'required' => array(
176 'href'
179 'bdo' => array(
180 'attributes' => array(
181 'dir' => '/^(ltr|rtl)$/'
183 'required' => array(
184 'dir'
187 'big',
188 'blockquote' => array(
189 'attributes' => array(
190 'cite'
193 'body',
194 'br',
195 'button' => array(
196 'attributes' => array(
197 'disabled' => '/^(disabled)$/',
198 'type' => '/^(button|reset|submit)$/',
199 'value'
201 'inside' => 'form'
203 'caption',
204 'cite',
205 'code',
206 'col' => array(
207 'attributes' => array(
208 'align' => '/^(right|left|center|justify)$/',
209 'char',
210 'charoff',
211 'span' => '/^(\d)+$/',
212 'valign' => '/^(top|middle|bottom|baseline)$/',
213 'width',
215 'inside' => 'colgroup'
217 'colgroup' => array(
218 'attributes' => array(
219 'align' => '/^(right|left|center|justify)$/',
220 'char',
221 'charoff',
222 'span' => '/^(\d)+$/',
223 'valign' => '/^(top|middle|bottom|baseline)$/',
224 'width',
227 'dd',
228 'del' => array(
229 'attributes' => array(
230 'cite',
231 'datetime' => '/^([0-9]){8}/'
234 'div',
235 'dfn',
236 'dl',
237 'dt',
238 'em',
239 'fieldset' => array(
240 'inside' => 'form'
242 'form' => array(
243 'attributes' => array(
244 'action',
245 'accept',
246 'accept-charset',
247 'enctype',
248 'method' => '/^(get|post)$/'
250 'required' => array(
251 'action'
254 'head' => array(
255 'attributes' => array(
256 'profile'
259 'h1',
260 'h2',
261 'h3',
262 'h4',
263 'h5',
264 'h6',
265 'hr',
266 'html' => array(
267 'attributes' => array(
268 'xmlns'
271 'i',
272 'img' => array(
273 'attributes' => array(
274 'alt',
275 'src',
276 'height',
277 'ismap',
278 'longdesc',
279 'usemap',
280 'width'
282 'required' => array(
283 'alt',
284 'src'
287 'input' => array(
288 'attributes' => array(
289 'accept',
290 'alt',
291 'checked' => '/^(checked)$/',
292 'disabled' => '/^(disabled)$/',
293 'maxlength' => '/^(\d)+$/',
294 'name',
295 'readonly' => '/^(readonly)$/',
296 'size' => '/^(\d)+$/',
297 'src',
298 'type' => '/^(button|checkbox|file|hidden|image|password|radio|reset|submit|text)$/',
299 'value'
301 'inside' => 'form'
303 'ins' => array(
304 'attributes' => array(
305 'cite',
306 'datetime' => '/^([0-9]){8}/'
309 'kbd',
310 'label' => array(
311 'attributes' => array(
312 'for'
314 'inside' => 'form'
316 'legend',
317 'li',
318 'link' => array(
319 'attributes' => array(
320 'charset',
321 'href',
322 'hreflang',
323 'media' => '/^(all|braille|print|projection|screen|speech|,|;| )+$/i',
324 'rel' => '/^(alternate|appendix|bookmark|chapter|contents|copyright|glossary|help|home|index|next|prev|section|start|stylesheet|subsection| |shortcut|icon)+$/i',
325 'rev' => '/^(alternate|appendix|bookmark|chapter|contents|copyright|glossary|help|home|index|next|prev|section|start|stylesheet|subsection| |shortcut|icon)+$/i',
326 'type'
328 'inside' => 'head'
330 'map' => array(
331 'attributes' => array(
332 'id',
333 'name'
335 'required' => array(
336 'id'
339 'meta' => array(
340 'attributes' => array(
341 'content',
342 'http-equiv' => '/^(content\-type|expires|refresh|set\-cookie)$/i',
343 'name',
344 'scheme'
346 'required' => array(
347 'content'
350 'noscript',
351 'object' => array(
352 'attributes' => array(
353 'archive',
354 'classid',
355 'codebase',
356 'codetype',
357 'data',
358 'declare',
359 'height',
360 'name',
361 'standby',
362 'type',
363 'usemap',
364 'width'
367 'ol',
368 'optgroup' => array(
369 'attributes' => array(
370 'label',
371 'disabled' => '/^(disabled)$/'
373 'required' => array(
374 'label'
377 'option' => array(
378 'attributes' => array(
379 'label',
380 'disabled' => '/^(disabled)$/',
381 'selected' => '/^(selected)$/',
382 'value'
384 'inside' => 'select',
386 'p',
387 'param' => array(
388 'attributes' => array(
389 'type',
390 'valuetype' => '/^(data|ref|object)$/',
391 'valuetype',
392 'value'
394 'required' => array(
395 'name'
398 'pre',
399 'q' => array(
400 'attributes' => array(
401 'cite'
404 'samp',
405 'script' => array(
406 'attributes' => array(
407 'type' => '/^(text\/ecmascript|text\/javascript|text\/jscript|text\/vbscript|text\/vbs|text\/xml)$/',
408 'charset',
409 'defer' => '/^(defer)$/',
410 'src'
412 'required' => array(
413 'type'
416 'select' => array(
417 'attributes' => array(
418 'disabled' => '/^(disabled)$/',
419 'multiple' => '/^(multiple)$/',
420 'name',
421 'size'
423 'inside' => 'form'
425 'small',
426 'span',
427 'strong',
428 'style' => array(
429 'attributes' => array(
430 'type',
431 'media' => '/^(screen|tty|tv|projection|handheld|print|braille|aural|all)$/'
433 'required' => array(
434 'type'
437 'sub',
438 'sup',
439 'table' => array(
440 'attributes' => array(
441 'border',
442 'cellpadding',
443 'cellspacing',
444 'frame' => '/^(void|above|below|hsides|lhs|rhs|vsides|box|border)$/',
445 'rules' => '/^(none|groups|rows|cols|all)$/',
446 'summary',
447 'width'
450 'tbody' => array(
451 'attributes' => array(
452 'align' => '/^(right|left|center|justify)$/',
453 'char',
454 'charoff',
455 'valign' => '/^(top|middle|bottom|baseline)$/'
458 'td' => array(
459 'attributes' => array(
460 'abbr',
461 'align' => '/^(left|right|center|justify|char)$/',
462 'axis',
463 'char',
464 'charoff',
465 'colspan' => '/^(\d)+$/',
466 'headers',
467 'rowspan' => '/^(\d)+$/',
468 'scope' => '/^(col|colgroup|row|rowgroup)$/',
469 'valign' => '/^(top|middle|bottom|baseline)$/'
472 'textarea' => array(
473 'attributes' => array(
474 'cols',
475 'rows',
476 'disabled',
477 'name',
478 'readonly'
480 'required' => array(
481 'cols',
482 'rows'
484 'inside' => 'form'
486 'tfoot' => array(
487 'attributes' => array(
488 'align' => '/^(right|left|center|justify)$/',
489 'char',
490 'charoff',
491 'valign' => '/^(top|middle|bottom)$/',
492 'baseline'
495 'th' => array(
496 'attributes' => array(
497 'abbr',
498 'align' => '/^(left|right|center|justify|char)$/',
499 'axis',
500 'char',
501 'charoff',
502 'colspan' => '/^(\d)+$/',
503 'headers',
504 'rowspan' => '/^(\d)+$/',
505 'scope' => '/^(col|colgroup|row|rowgroup)$/',
506 'valign' => '/^(top|middle|bottom|baseline)$/'
509 'thead' => array(
510 'attributes' => array(
511 'align' => '/^(right|left|center|justify)$/',
512 'char',
513 'charoff',
514 'valign' => '/^(top|middle|bottom|baseline)$/'
517 'title',
518 'tr' => array(
519 'attributes' => array(
520 'align' => '/^(right|left|center|justify|char)$/',
521 'char',
522 'charoff',
523 'valign' => '/^(top|middle|bottom|baseline)$/'
526 'tt',
527 'ul',
528 'var',
531 var $_entities = array(
532 '&nbsp;' => '&#160;',
533 '&iexcl;' => '&#161;',
534 '&cent;' => '&#162;',
535 '&pound;' => '&#163;',
536 '&curren;' => '&#164;',
537 '&yen;' => '&#165;',
538 '&brvbar;' => '&#166;',
539 '&sect;' => '&#167;',
540 '&uml;' => '&#168;',
541 '&copy;' => '&#169;',
542 '&ordf;' => '&#170;',
543 '&laquo;' => '&#171;',
544 '&not;' => '&#172;',
545 '&shy;' => '&#173;',
546 '&reg;' => '&#174;',
547 '&macr;' => '&#175;',
548 '&deg;' => '&#176;',
549 '&plusmn;' => '&#177;',
550 '&sup2;' => '&#178;',
551 '&sup3;' => '&#179;',
552 '&acute;' => '&#180;',
553 '&micro;' => '&#181;',
554 '&para;' => '&#182;',
555 '&middot;' => '&#183;',
556 '&cedil;' => '&#184;',
557 '&sup1;' => '&#185;',
558 '&ordm;' => '&#186;',
559 '&raquo;' => '&#187;',
560 '&frac14;' => '&#188;',
561 '&frac12;' => '&#189;',
562 '&frac34;' => '&#190;',
563 '&iquest;' => '&#191;',
564 '&Agrave;' => '&#192;',
565 '&Aacute;' => '&#193;',
566 '&Acirc;' => '&#194;',
567 '&Atilde;' => '&#195;',
568 '&Auml;' => '&#196;',
569 '&Aring;' => '&#197;',
570 '&AElig;' => '&#198;',
571 '&Ccedil;' => '&#199;',
572 '&Egrave;' => '&#200;',
573 '&Eacute;' => '&#201;',
574 '&Ecirc;' => '&#202;',
575 '&Euml;' => '&#203;',
576 '&Igrave;' => '&#204;',
577 '&Iacute;' => '&#205;',
578 '&Icirc;' => '&#206;',
579 '&Iuml;' => '&#207;',
580 '&ETH;' => '&#208;',
581 '&Ntilde;' => '&#209;',
582 '&Ograve;' => '&#210;',
583 '&Oacute;' => '&#211;',
584 '&Ocirc;' => '&#212;',
585 '&Otilde;' => '&#213;',
586 '&Ouml;' => '&#214;',
587 '&times;' => '&#215;',
588 '&Oslash;' => '&#216;',
589 '&Ugrave;' => '&#217;',
590 '&Uacute;' => '&#218;',
591 '&Ucirc;' => '&#219;',
592 '&Uuml;' => '&#220;',
593 '&Yacute;' => '&#221;',
594 '&THORN;' => '&#222;',
595 '&szlig;' => '&#223;',
596 '&agrave;' => '&#224;',
597 '&aacute;' => '&#225;',
598 '&acirc;' => '&#226;',
599 '&atilde;' => '&#227;',
600 '&auml;' => '&#228;',
601 '&aring;' => '&#229;',
602 '&aelig;' => '&#230;',
603 '&ccedil;' => '&#231;',
604 '&egrave;' => '&#232;',
605 '&eacute;' => '&#233;',
606 '&ecirc;' => '&#234;',
607 '&euml;' => '&#235;',
608 '&igrave;' => '&#236;',
609 '&iacute;' => '&#237;',
610 '&icirc;' => '&#238;',
611 '&iuml;' => '&#239;',
612 '&eth;' => '&#240;',
613 '&ntilde;' => '&#241;',
614 '&ograve;' => '&#242;',
615 '&oacute;' => '&#243;',
616 '&ocirc;' => '&#244;',
617 '&otilde;' => '&#245;',
618 '&ouml;' => '&#246;',
619 '&divide;' => '&#247;',
620 '&oslash;' => '&#248;',
621 '&ugrave;' => '&#249;',
622 '&uacute;' => '&#250;',
623 '&ucirc;' => '&#251;',
624 '&uuml;' => '&#252;',
625 '&yacute;' => '&#253;',
626 '&thorn;' => '&#254;',
627 '&yuml;' => '&#255;',
628 '&fnof;' => '&#402;',
629 '&Alpha;' => '&#913;',
630 '&Beta;' => '&#914;',
631 '&Gamma;' => '&#915;',
632 '&Delta;' => '&#916;',
633 '&Epsilon;' => '&#917;',
634 '&Zeta;' => '&#918;',
635 '&Eta;' => '&#919;',
636 '&Theta;' => '&#920;',
637 '&Iota;' => '&#921;',
638 '&Kappa;' => '&#922;',
639 '&Lambda;' => '&#923;',
640 '&Mu;' => '&#924;',
641 '&Nu;' => '&#925;',
642 '&Xi;' => '&#926;',
643 '&Omicron;' => '&#927;',
644 '&Pi;' => '&#928;',
645 '&Rho;' => '&#929;',
646 '&Sigma;' => '&#931;',
647 '&Tau;' => '&#932;',
648 '&Upsilon;' => '&#933;',
649 '&Phi;' => '&#934;',
650 '&Chi;' => '&#935;',
651 '&Psi;' => '&#936;',
652 '&Omega;' => '&#937;',
653 '&alpha;' => '&#945;',
654 '&beta;' => '&#946;',
655 '&gamma;' => '&#947;',
656 '&delta;' => '&#948;',
657 '&epsilon;' => '&#949;',
658 '&zeta;' => '&#950;',
659 '&eta;' => '&#951;',
660 '&theta;' => '&#952;',
661 '&iota;' => '&#953;',
662 '&kappa;' => '&#954;',
663 '&lambda;' => '&#955;',
664 '&mu;' => '&#956;',
665 '&nu;' => '&#957;',
666 '&xi;' => '&#958;',
667 '&omicron;' => '&#959;',
668 '&pi;' => '&#960;',
669 '&rho;' => '&#961;',
670 '&sigmaf;' => '&#962;',
671 '&sigma;' => '&#963;',
672 '&tau;' => '&#964;',
673 '&upsilon;' => '&#965;',
674 '&phi;' => '&#966;',
675 '&chi;' => '&#967;',
676 '&psi;' => '&#968;',
677 '&omega;' => '&#969;',
678 '&thetasym;' => '&#977;',
679 '&upsih;' => '&#978;',
680 '&piv;' => '&#982;',
681 '&bull;' => '&#8226;',
682 '&hellip;' => '&#8230;',
683 '&prime;' => '&#8242;',
684 '&Prime;' => '&#8243;',
685 '&oline;' => '&#8254;',
686 '&frasl;' => '&#8260;',
687 '&weierp;' => '&#8472;',
688 '&image;' => '&#8465;',
689 '&real;' => '&#8476;',
690 '&trade;' => '&#8482;',
691 '&alefsym;' => '&#8501;',
692 '&larr;' => '&#8592;',
693 '&uarr;' => '&#8593;',
694 '&rarr;' => '&#8594;',
695 '&darr;' => '&#8595;',
696 '&harr;' => '&#8596;',
697 '&crarr;' => '&#8629;',
698 '&lArr;' => '&#8656;',
699 '&uArr;' => '&#8657;',
700 '&rArr;' => '&#8658;',
701 '&dArr;' => '&#8659;',
702 '&hArr;' => '&#8660;',
703 '&forall;' => '&#8704;',
704 '&part;' => '&#8706;',
705 '&exist;' => '&#8707;',
706 '&empty;' => '&#8709;',
707 '&nabla;' => '&#8711;',
708 '&isin;' => '&#8712;',
709 '&notin;' => '&#8713;',
710 '&ni;' => '&#8715;',
711 '&prod;' => '&#8719;',
712 '&sum;' => '&#8721;',
713 '&minus;' => '&#8722;',
714 '&lowast;' => '&#8727;',
715 '&radic;' => '&#8730;',
716 '&prop;' => '&#8733;',
717 '&infin;' => '&#8734;',
718 '&ang;' => '&#8736;',
719 '&and;' => '&#8743;',
720 '&or;' => '&#8744;',
721 '&cap;' => '&#8745;',
722 '&cup;' => '&#8746;',
723 '&int;' => '&#8747;',
724 '&there4;' => '&#8756;',
725 '&sim;' => '&#8764;',
726 '&cong;' => '&#8773;',
727 '&asymp;' => '&#8776;',
728 '&ne;' => '&#8800;',
729 '&equiv;' => '&#8801;',
730 '&le;' => '&#8804;',
731 '&ge;' => '&#8805;',
732 '&sub;' => '&#8834;',
733 '&sup;' => '&#8835;',
734 '&nsub;' => '&#8836;',
735 '&sube;' => '&#8838;',
736 '&supe;' => '&#8839;',
737 '&oplus;' => '&#8853;',
738 '&otimes;' => '&#8855;',
739 '&perp;' => '&#8869;',
740 '&sdot;' => '&#8901;',
741 '&lceil;' => '&#8968;',
742 '&rceil;' => '&#8969;',
743 '&lfloor;' => '&#8970;',
744 '&rfloor;' => '&#8971;',
745 '&lang;' => '&#9001;',
746 '&rang;' => '&#9002;',
747 '&loz;' => '&#9674;',
748 '&spades;' => '&#9824;',
749 '&clubs;' => '&#9827;',
750 '&hearts;' => '&#9829;',
751 '&diams;' => '&#9830;',
752 '&quot;' => '&#34;',
753 '&amp;' => '&#38;',
754 '&lt;' => '&#60;',
755 '&gt;' => '&#62;',
756 '&OElig;' => '&#338;',
757 '&oelig;' => '&#339;',
758 '&Scaron;' => '&#352;',
759 '&scaron;' => '&#353;',
760 '&Yuml;' => '&#376;',
761 '&circ;' => '&#710;',
762 '&tilde;' => '&#732;',
763 '&ensp;' => '&#8194;',
764 '&emsp;' => '&#8195;',
765 '&thinsp;' => '&#8201;',
766 '&zwnj;' => '&#8204;',
767 '&zwj;' => '&#8205;',
768 '&lrm;' => '&#8206;',
769 '&rlm;' => '&#8207;',
770 '&ndash;' => '&#8211;',
771 '&mdash;' => '&#8212;',
772 '&lsquo;' => '&#8216;',
773 '&rsquo;' => '&#8217;',
774 '&sbquo;' => '&#8218;',
775 '&ldquo;' => '&#8220;',
776 '&rdquo;' => '&#8221;',
777 '&bdquo;' => '&#8222;',
778 '&dagger;' => '&#8224;',
779 '&Dagger;' => '&#8225;',
780 '&permil;' => '&#8240;',
781 '&lsaquo;' => '&#8249;',
782 '&rsaquo;' => '&#8250;',
783 '&euro;' => '&#8364;'
786 var $_parser;
787 var $_stack = array();
788 var $_errors = array();
790 function AkXhtmlValidator()
792 $this->_parser = xml_parser_create('');
793 xml_set_object($this->_parser, &$this);
794 xml_set_element_handler($this->_parser, 'tagOpen', 'tagClose');
795 xml_set_character_data_handler($this->_parser, 'cdata');
796 xml_parser_set_option($this->_parser, XML_OPTION_CASE_FOLDING, false);
797 xml_parser_set_option($this->_parser, XML_OPTION_TARGET_ENCODING, 'UTF-8');
800 function validateTagAttributes($tag, $attributes = array())
802 $possible_attributes = $this->getPossibleTagAttributes($tag);
803 foreach($attributes as $attribute => $value) {
804 if (!in_array($attribute, $possible_attributes)) {
805 $this->addError(Ak::t("Attribute %attribute can't be used inside &lt;%tag> tags", array(
806 '%attribute' => $attribute,
807 '%tag' => $tag
808 )) , array(
809 array(
810 $attribute,
811 $tag
814 } elseif ($this->doesAttributeNeedsValidation($tag, $attribute)) {
815 $this->validateAttribute($tag, $attribute, $value);
820 function doesAttributeNeedsValidation($tag, $attribute)
822 return isset($this->_tags[$tag]['attributes'][$attribute]) || isset($this->_tags[$tag]['required']) && in_array($attribute, $this->_tags[$tag]['required']);
825 function validateAttribute($tag, $attribute, $value = null)
827 if (isset($this->_tags[$tag]['attributes'][$attribute]) && (strlen($value) > 0)) {
828 if (!preg_match($this->_tags[$tag]['attributes'][$attribute], $value)) {
829 $this->addError(Ak::t("Invalid value on &lt;%tag %attribute=\"%value\"... Valid values must match the pattern \"%pattern\"", array(
830 '%tag' => $tag,
831 '%attribute' => $attribute,
832 '%value' => $value,
833 '%pattern' => htmlentities($this->_tags[$tag]['attributes'][$attribute])
834 )) , array(
835 array(
836 $attribute,
837 $value
842 if (isset($this->_tags[$tag]['required']) && in_array($attribute, $this->_tags[$tag]['required']) && (strlen($value) == 0)) {
843 $this->addError(Ak::t("Missing required attribute %attribute on &lt;%tag&gt;", array(
844 '%tag' => $tag,
845 '%attribute' => $attribute
846 )) , array(
847 array(
848 $tag,
849 $attribute
855 function addError($error, $highlight_text = array())
857 $this->_errors[] = $this->highlightError($error, $highlight_text) .' on line '.$this->getCurrentLine();
860 function highlightError($error, $highlight_text = array())
862 if (empty($highlight_text)) {
863 return $error;
865 require_once (AK_LIB_DIR.DS.'AkColor.php');
866 require_once (AK_LIB_DIR.DS.'AkActionView'.DS.'helpers'.DS.'text_helper.php');
867 $line = $this->getCurrentLine();
868 $highlighted_error = '';
869 foreach($highlight_text as $phrases) {
870 $color = AkColor::getRandomHex();
871 if (is_array($phrases)) {
872 $highlighted_error_line = $error;
873 foreach($phrases as $phrase) {
874 $this->_linesToHighlight[$line][$error] = array(
875 'color' => $color,
876 'phrase' => htmlentities($phrase)
878 $highlighted_error_line = TextHelper::highlight($highlighted_error_line, $phrase.' ', ' <strong style="border:2px solid #'.$color.'; background: #ffc;">\1</strong> ');
880 $highlighted_error.= $highlighted_error_line;
881 } else {
882 $highlighted_error = TextHelper::highlight($error, $phrases.' ', ' <strong style="border:2px solid #'.$color.'; background: #ffc">\1</strong> ');
883 $this->_linesToHighlight[$line][$error] = array(
884 'color' => $color,
885 'phrase' => htmlentities($phrases)
889 return $highlighted_error;
892 function highlightErrors($xhtml)
894 $highlighted_xhtml = array();
895 if (!empty($this->_linesToHighlight)) {
896 $xhtml_arr = preg_split('/\n|\r/', $xhtml);
897 foreach($xhtml_arr as $k => $xhtml_line) {
898 $pos = $k+$this->_startLine;
899 $highlighted_xhtml[$k] = $pos."&nbsp;&nbsp;&nbsp;&nbsp;";
900 $xhtml_line = htmlentities($xhtml_line);
901 if (isset($this->_linesToHighlight[$pos])) {
902 foreach($this->_linesToHighlight[$pos] as $highlight_details) {
903 $highlighted_xhtml[$k].= TextHelper::highlight($xhtml_line, $highlight_details['phrase'], '<strong style="border:2px solid #'.$highlight_details['color'].';padding:1px; margin:1px; background: #ffc;">\1</strong>');
905 } else {
906 $highlighted_xhtml[$k].= $xhtml_line;
908 $highlighted_xhtml[$k].= "<br />\n";
911 return empty($highlighted_xhtml) ? $xhtml : join($highlighted_xhtml);
914 function getCurrentLine()
916 return xml_get_current_line_number($this->_parser) +$this->_startLine;
919 function hasErrors(&$xhtml)
921 $this->validateUniquenessOfIds();
922 if (count($this->getErrors()) > 0) {
923 $xhtml = $this->highlightErrors($xhtml);
924 return true;
925 } else {
926 return false;
930 function getErrors()
932 return array_unique($this->_errors);
935 function showErrors()
937 echo '<ul><li>'.join("</li>\n<li>", $this->getErrors()) .'</li></ul>';
940 function getPossibleTagAttributes($tag)
942 static $cache;
943 if (!isset($cache[$tag])) {
944 $cache[$tag] = array_unique(array_merge($this->getUniqueAttributesAndEventsForTag($tag) , $this->getDefaultAttributesAndEventsForTag($tag)));
945 sort($cache[$tag]);
947 return $cache[$tag];
950 function validateRequiredAttributes($tag, $attributes)
952 $compulsory = $this->getCompulsoryTagAttributes($tag);
953 $errors = array_diff($compulsory, array_keys($attributes));
954 if (!empty($errors)) {
955 $this->addError(Ak::t('Tag %tag requires %attributes to be defined', array(
956 '%tag' => $tag,
957 '%attributes' => (count($errors) == 1 ? 'attribute "' : 'attributes "') .join('", "', $errors) .'"'
958 )) , array(
959 $tag
964 function protectFromDuplicatedIds($tag, $attributes)
966 if (isset($attributes['id'])) {
967 if (isset($this->_idTagXref[$attributes['id']])) {
968 $this->addError(Ak::t('Repeating id %id', array(
969 '%id' => $attributes['id']
970 )) , array(
971 $attributes['id']
974 $this->_tagIdCounter[$attributes['id']] = isset($this->_tagIdCounter[$attributes['id']]) ? $this->_tagIdCounter[$attributes['id']]+1 : 1;
975 $this->_idTagXref[$attributes['id']][] = $tag;
979 function validateUniquenessOfIds()
981 if (isset($this->_tagIdCounter) && max(array_values($this->_tagIdCounter)) > 1) {
982 foreach($this->_tagIdCounter as $id => $count) {
983 if ($count > 1) {
984 $this->addError(Ak::t('You have repeated the id %id %count times on your xhtml code. Duplicated Ids found on %tags', array(
985 '%id' => "\"$id\"",
986 '%count' => $count,
987 '%tags' => (count($this->_idTagXref[$id]) == 1 ? 'tag "' : 'tag "') .join('", "', $this->_idTagXref[$id]) .'"'
988 )));
994 function getCompulsoryTagAttributes($tag)
996 return !empty($this->_tags[$tag]['required']) ? (array)$this->_tags[$tag]['required'] : array();
999 function getUniqueAttributesAndEventsForTag($tag)
1001 $result = array();
1002 if (isset($this->_tags[$tag]['attributes']) && is_array($this->_tags[$tag]['attributes'])) {
1003 foreach($this->_tags[$tag]['attributes'] as $k => $candidate) {
1004 $result[] = is_numeric($k) ? $candidate : $k;
1007 return $result;
1010 function getDefaultAttributesAndEventsForTag($tag)
1012 $default = array();
1013 if (isset($this->_tags[$tag]) || in_array($tag, $this->_tags)) {
1014 foreach($this->getDefaultAttributesAndEventsForTags() as $defaults) {
1015 if ((isset($defaults['except']) && in_array($tag, $defaults['except'])) || (isset($defaults['only']) && !in_array($tag, $defaults['only']))) {
1016 continue;
1018 foreach(isset($defaults['attributes']) ? $defaults['attributes'] : $defaults['events'] as $k => $candidate) {
1019 $default[] = is_array($candidate) ? $k : $candidate;;
1023 return $default;
1026 function getDefaultAttributesAndEventsForTags()
1028 if (!isset($this->default_values_for_tags)) {
1029 $this->default_values_for_tags = array_merge($this->_attributes, $this->_events);
1031 return $this->default_values_for_tags;
1034 function getAvailableTags()
1036 $tags = array();
1037 foreach(array_keys($this->_tags) as $k) {
1038 $tags[] = is_numeric($k) ? $this->_tags[$k] : $k;
1040 sort($tags);
1041 return $tags;
1044 function validate(&$xhtml)
1046 $this->_startLine = 1;
1047 $xhtml_copy = $this->removeDoctypeHeader($xhtml);
1048 $xhtml_copy = $this->removeCdata($xhtml_copy);
1049 $xhtml_copy = $this->convertLiteralEntitiesToNumericalEntities($xhtml_copy);
1050 $xhtml_copy = '<all>'.$xhtml_copy.'</all>';
1051 if (!xml_parse($this->_parser, $xhtml_copy)) {
1052 $this->addError(Ak::t('XHTML is not well-formed.') .' '.xml_error_string(xml_get_error_code($this->_parser)));
1054 return !$this->hasErrors($xhtml);
1057 function removeDoctypeHeader($xhtml)
1059 if (substr($xhtml, 0, 9) == '<!DOCTYPE') {
1060 $replacement = substr($xhtml, 0, strpos($xhtml, '>'));
1061 $this->_startLine = count(substr_count($replacement, "\n"));
1063 return (isset($replacement)) ? substr($xhtml, strlen($replacement)) : $xhtml;
1066 function removeCdata($xhtml)
1068 $xhtml = preg_replace('(<\!\[CDATA\[(.|\n)*\]\]>)', '', $xhtml);
1069 return str_replace(array('<![CDATA[',']]>') , '', $xhtml);
1073 function convertLiteralEntitiesToNumericalEntities($xhtml)
1075 return str_replace(array_keys($this->_entities), array_values($this->_entities), $xhtml);
1078 function tagOpen($parser, $tag, $attributes)
1080 $this->_start_byte = xml_get_current_byte_index($parser);
1081 if ($tag == 'all') {
1082 $this->_stack[] = 'all';
1083 return;
1085 $previous = $this->_stack[count($this->_stack) -1];
1086 $this->validateRequiredAttributes($tag, $attributes);
1087 $this->protectFromDuplicatedIds($tag, $attributes);
1088 if (!in_array($previous, $this->getAvailableTags())) {
1089 $this->validateTagAttributes($tag, $attributes);
1090 $this->_stack[] = $tag;
1091 return;
1093 if (!in_array($tag, $this->getAvailableTags())) {
1094 $this->addError(Ak::t("Illegal tag: <code>%tag</code>", array(
1095 '%tag' => $tag
1096 )) , array(
1097 $tag
1099 $this->_stack[] = $tag;
1100 return;
1102 // Is tag allowed in the current context?
1103 if (!$this->isTagAlowedOnCurrentContext($tag, $previous)) {
1104 if ($previous != 'all') {
1105 //$this->addError(Ak::t("Tag <code>%tag</code> must occur inside another tag",array('%tag'=>$tag)));
1106 //} else {
1107 $this->addError(Ak::t("Tag %tag is not allowed within tag %previous", array(
1108 '%tag' => $tag,
1109 '%previous' => $previous
1110 )) , array(
1111 $tag
1115 $this->validateTagAttributes($tag, $attributes);
1116 $this->_stack[] = $tag;
1119 function isTagAlowedOnCurrentContext($tag, $previous)
1121 $rules = $this->getRules();
1122 $result = isset($rules[$previous]) ? in_array($tag, $rules[$previous]) : true;
1123 $inverse_rules = $this->getInverseRulesForTag($tag);
1124 $result = isset($inverse_rules[$tag]) ? in_array($previous, $inverse_rules[$tag]) : $result;
1125 return $result;
1128 function getRules()
1130 static $rules;
1131 if (!isset($rules)) {
1132 //$inline = array ('abbr','cite','code','dfn','em','kbd','object','quote','q','samp','span','strong','var','a','sup','sub','acronym','img','#PCDATA');
1133 $inline = array(
1134 '#pcdata',
1135 'a',
1136 'abbr',
1137 'acronym',
1138 'applet',
1139 'b',
1140 'basefont',
1141 'bdo',
1142 'big',
1143 'br',
1144 'button',
1145 'cite',
1146 'code',
1147 'dfn',
1148 'em',
1149 'font',
1150 'i',
1151 'img',
1152 'input',
1153 'kbd',
1154 'label',
1155 'map',
1156 'object',
1157 'q',
1158 's',
1159 'samp',
1160 'select',
1161 'small',
1162 'span',
1163 'strike',
1164 'strong',
1165 'sub',
1166 'sup',
1167 'textarea',
1168 'tt',
1169 'u',
1170 'var'
1172 //$block = array('dl','nl','ol','ul','address','blockcode','blockquote','div','p','pre','handler','section','separator','table');
1173 $block = array(
1174 'address',
1175 'blockcode',
1176 'blockquote',
1177 'center',
1178 'dir',
1179 'div',
1180 'dl',
1181 'fieldset',
1182 'form',
1183 'h1',
1184 'h2',
1185 'h3',
1186 'h4',
1187 'h5',
1188 'h6',
1189 'handler',
1190 'hr',
1191 'iframe',
1192 'isindex',
1193 'menu',
1194 'nl',
1195 'noframes',
1196 'script',
1197 'noscript',
1198 'ol',
1199 'p',
1200 'pre',
1201 'section',
1202 'separator',
1203 'table',
1204 'ul'
1206 $flow = array_merge($block, $inline);
1207 $rules = array(
1208 'html' => array(
1209 'head',
1210 'body'
1212 'head' => array(
1213 'script',
1214 'style',
1215 'meta',
1216 'base',
1217 'link',
1218 'title'
1220 'body' => array_merge(array(
1221 'ins',
1222 'del'
1223 ) , $flow) ,
1224 'ul' => array(
1225 'li'
1227 'ol' => array(
1228 'li'
1230 //'p' => array_merge($inline, array('blockcode', 'blockquote', 'pre', 'table', 'dl', 'nl', 'ol', 'ul')),
1231 'blockquote' => $block,
1232 'dl' => array(
1233 'dt',
1234 'dd'
1236 'pre' => array_diff($inline, array(
1237 'img',
1238 'object',
1239 'big',
1240 'small',
1241 'sub',
1242 'sup'
1243 )) ,
1244 'form' => array_diff($flow, array(
1245 'form'
1246 )) ,
1247 // Tables
1248 'table' => array(
1249 'caption',
1250 'colgroup',
1251 'col',
1252 'thead',
1253 'tbody',
1254 'tr'
1256 'colgroup' => array(
1257 'col'
1259 'thead' => array(
1260 'tr'
1262 'tbody' => array(
1263 'tr'
1265 'tr' => array(
1266 'th',
1267 'td'
1269 'address' => array_merge($inline, array(
1271 )) ,
1272 'fieldset' => array_merge($flow, array(
1273 'legend'
1274 )) ,
1275 'a' => array_diff($inline, array(
1277 )) ,
1278 'object' => array_merge($flow, array(
1279 'param'
1280 )) ,
1281 'map' => array_merge($block, array(
1282 'area'
1283 )) ,
1284 'select' => array(
1285 'optgroup',
1286 'option'
1288 'optgroup' => array(
1289 'option'
1291 'label' => array_diff($inline, array(
1292 'label'
1293 )) ,
1294 'button' => array_diff($flow, array(
1295 'a',
1296 'input',
1297 'select',
1298 'textarea',
1299 'label',
1300 'button',
1301 'form',
1302 'fieldset',
1303 'iframe'
1304 )) ,
1306 $flow_tags = array(
1307 'div',
1308 'center',
1309 'blockquote',
1310 'script',
1311 'noscript',
1312 'dd',
1313 'li',
1314 'th',
1315 'td'
1317 foreach($flow_tags as $flow_tag) {
1318 $rules[$flow_tag] = $flow;
1320 $inline_tags = array(
1321 'p',
1322 'h1',
1323 'h2',
1324 'h3',
1325 'h4',
1326 'h5',
1327 'h6',
1328 'dt',
1329 'caption',
1330 'legend',
1331 'tt',
1332 'abbr',
1333 'acronym',
1334 'b',
1335 'bdo',
1336 'big',
1337 'cite',
1338 'code',
1339 'dfn',
1340 'em',
1341 'font',
1342 'i',
1343 'kbd',
1344 'q',
1345 's',
1346 'samp',
1347 'small',
1348 'span',
1349 'strike',
1350 'strong',
1351 'sub',
1352 'sup',
1353 'u',
1354 'var'
1356 foreach($inline_tags as $inline_tag) {
1357 $rules[$inline_tag] = $inline;
1360 return $rules;
1363 function getInverseRulesForTag($tag)
1365 static $inverse_rules;
1366 if (!isset($inverse_rules[$tag])) {
1367 $inverse_rules[$tag] = array();
1368 $rules = $this->getRules();
1369 foreach($rules as $container_tag => $rule) {
1370 if (in_array($tag, $rule)) {
1371 $inverse_rules[$tag][] = $container_tag;
1375 return $inverse_rules[$tag];
1378 function cdata($parser, $cdata)
1380 // Simply check that the 'previous' tag allows CDATA
1381 $previous = $this->_stack[count($this->_stack) -1];
1382 if ($cdata != '' && in_array($previous, array(
1383 'base',
1384 'area',
1385 'basefont',
1386 'br',
1387 'col',
1388 'hr',
1389 'img',
1390 'input',
1391 'link',
1392 'meta',
1393 'param'
1394 ))) {
1395 $this->addError(Ak::t("%previous tag is not a content tag. close it like this '&lt;%previous /&gt;'", array(
1396 '%previous' => $previous
1397 )) , array(
1398 $previous
1401 // If previous tag is illegal, no point in running test
1402 if (!in_array($previous, $this->getAvailableTags())) {
1403 return;
1405 if (trim($cdata) != '') {
1406 if (!$this->isTagAlowedOnCurrentContext('#pcdata', $previous)) {
1407 $this->addError(Ak::t("Tag <code>%previous</code> may not contain raw character data", array(
1408 '%previous' => $previous
1409 )) , array(
1410 $previous
1416 function tagClose($parser, $tag)
1418 if (in_array($tag, array(
1419 'base',
1420 'area',
1421 'basefont',
1422 'br',
1423 'col',
1424 'hr',
1425 'img',
1426 'input',
1427 'link',
1428 'meta',
1429 'param'
1430 ))) {
1431 $this->_end_byte = xml_get_current_byte_index($parser);
1432 if ($this->_end_byte-$this->_start_byte == 4) {
1433 $this->addError(Ak::t("%tag tag is not a content tag. close it like this '&lt;%tag /&gt;'", array(
1434 '%tag' => $tag
1435 )) , array(
1436 $tag
1440 array_pop($this->_stack);