Rename newline normalization directive to something better.
[htmlpurifier.git] / tests / HTMLPurifier / AttrDef / LangTest.php
blobc59175556af1d2fa330fea0f334c0c324ea2bc82
1 <?php
3 class HTMLPurifier_AttrDef_LangTest extends HTMLPurifier_AttrDefHarness
6 function test() {
8 $this->def = new HTMLPurifier_AttrDef_Lang();
10 // basic good uses
11 $this->assertDef('en');
12 $this->assertDef('en-us');
14 $this->assertDef(' en ', 'en'); // trim
15 $this->assertDef('EN', 'en'); // case insensitivity
17 // (thanks Eugen Pankratz for noticing the typos!)
18 $this->assertDef('En-Us-Edison', 'en-us-edison'); // complex ci
20 $this->assertDef('fr en', false); // multiple languages
21 $this->assertDef('%', false); // bad character
23 // test overlong language according to syntax
24 $this->assertDef('thisistoolongsoitgetscut', false);
26 // primary subtag rules
27 // I'm somewhat hesitant to allow x and i as primary language codes,
28 // because they usually are never used in real life. However,
29 // theoretically speaking, having them alone is permissable, so
30 // I'll be lenient. No XML parser is going to complain anyway.
31 $this->assertDef('x');
32 $this->assertDef('i');
33 // real world use-cases
34 $this->assertDef('x-klingon');
35 $this->assertDef('i-mingo');
36 // because the RFC only defines two and three letter primary codes,
37 // anything with a length of four or greater is invalid, despite
38 // the syntax stipulation of 1 to 8 characters. Because the RFC
39 // specifically states that this reservation is in order to allow
40 // for future versions to expand, the adoption of a new RFC will
41 // require these test cases to be rewritten, even if backwards-
42 // compatibility is largely retained (i.e. this is not forwards
43 // compatible)
44 $this->assertDef('four', false);
45 // for similar reasons, disallow any other one character language
46 $this->assertDef('f', false);
48 // second subtag rules
49 // one letter subtags prohibited until revision. This is, however,
50 // less volatile than the restrictions on the primary subtags.
51 // Also note that this test-case tests fix-behavior: chop
52 // off subtags until you get a valid language code.
53 $this->assertDef('en-a', 'en');
54 // however, x is a reserved single-letter subtag that is allowed
55 $this->assertDef('en-x', 'en-x');
56 // 2-8 chars are permitted, but have special meaning that cannot
57 // be checked without maintaining country code lookup tables (for
58 // two characters) or special registration tables (for all above).
59 $this->assertDef('en-uk', true);
61 // further subtag rules: only syntactic constraints
62 $this->assertDef('en-us-edison');
63 $this->assertDef('en-us-toolonghaha', 'en-us');
64 $this->assertDef('en-us-a-silly-long-one');
66 // rfc 3066 stipulates that if a three letter and a two letter code
67 // are available, the two letter one MUST be used. Without a language
68 // code lookup table, we cannot implement this functionality.
70 // although the HTML protocol, technically speaking, allows you to
71 // omit language tags, this implicitly means that the parent element's
72 // language is the one applicable, which, in some cases, is incorrect.
73 // Thus, we allow und, only slightly defying the RFC's SHOULD NOT
74 // designation.
75 $this->assertDef('und');
77 // because attributes only allow one language, mul is allowed, complying
78 // with the RFC's SHOULD NOT designation.
79 $this->assertDef('mul');
85 // vim: et sw=4 sts=4