Add "Package:" file headers to denote built-in packages.
[emacs.git] / lisp / progmodes / ebnf-dtd.el
blob2ca38406d4f86fb6c35bfbc02b47c34e85c619ab
1 ;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 ;; Free Software Foundation, Inc.
6 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
7 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
8 ;; Keywords: wp, ebnf, PostScript
9 ;; Version: 1.1
10 ;; Package: ebnf2ps
12 ;; This file is part of GNU Emacs.
14 ;; GNU Emacs is free software: you can redistribute it and/or modify
15 ;; it under the terms of the GNU General Public License as published by
16 ;; the Free Software Foundation, either version 3 of the License, or
17 ;; (at your option) any later version.
19 ;; GNU Emacs is distributed in the hope that it will be useful,
20 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
21 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 ;; GNU General Public License for more details.
24 ;; You should have received a copy of the GNU General Public License
25 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
27 ;;; Commentary:
29 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32 ;; This is part of ebnf2ps package.
34 ;; This package defines a parser for DTD (Data Type Description for XML).
36 ;; See ebnf2ps.el for documentation.
39 ;; DTD Syntax
40 ;; ----------
42 ;; See the URLs:
43 ;; `http://www.w3.org/TR/2004/REC-xml-20040204/'
44 ;; (Extensible Markup Language (XML) 1.0 (Third Edition))
45 ;; `http://www.w3.org/TR/html40/'
46 ;; (HTML 4.01 Specification)
47 ;; `http://www.w3.org/TR/NOTE-html-970421'
48 ;; (HTML DTD with support for Style Sheets)
51 ;; /* Document */
53 ;; document ::= prolog element Misc*
54 ;; /* Note that *only* the prolog will be parsed */
57 ;; /* Characters */
59 ;; Char ::= #x9 | #xA | #xD
60 ;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
61 ;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
63 ;; /* NOTE:
65 ;; Document authors are encouraged to avoid "compatibility characters", as
66 ;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
67 ;; [Unicode3]). The characters defined in the following ranges are also
68 ;; discouraged. They are either control characters or permanently undefined
69 ;; Unicode characters:
71 ;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF],
72 ;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
73 ;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
74 ;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
75 ;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
76 ;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
77 ;; [#10FFFE-#x10FFFF]. */
80 ;; /* White Space */
82 ;; S ::= (#x20 | #x9 | #xD | #xA)+
85 ;; /* Names and Tokens */
87 ;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
88 ;; | CombiningChar | Extender
90 ;; Name ::= (Letter | '_' | ':') (NameChar)*
92 ;; Names ::= Name (#x20 Name)*
94 ;; Nmtoken ::= (NameChar)+
96 ;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
99 ;; /* Literals */
101 ;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
102 ;; | "'" ([^%&'] | PEReference | Reference)* "'"
104 ;; AttValue ::= '"' ([^<&"] | Reference)* '"'
105 ;; | "'" ([^<&'] | Reference)* "'"
107 ;; SystemLiteral ::= ('"' [^"]* '"')
108 ;; | ("'" [^']* "'")
110 ;; PubidLiteral ::= '"' PubidChar* '"'
111 ;; | "'" (PubidChar - "'")* "'"
113 ;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
115 ;; /* NOTE:
117 ;; Although the EntityValue production allows the definition of a general
118 ;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY
119 ;; mylt "<">), it is strongly advised to avoid this practice since any
120 ;; reference to that entity will cause a well-formedness error. */
123 ;; /* Character Data */
125 ;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
128 ;; /* Comments */
130 ;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
133 ;; /* Processing Instructions */
135 ;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
137 ;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
140 ;; /* CDATA Sections */
142 ;; CDSect ::= CDStart CData CDEnd
144 ;; CDStart ::= '<![CDATA['
146 ;; CData ::= (Char* - (Char* ']]>' Char*))
148 ;; CDEnd ::= ']]>'
151 ;; /* Prolog */
153 ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
155 ;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
157 ;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
159 ;; Eq ::= S? '=' S?
161 ;; VersionNum ::= '1.0'
163 ;; Misc ::= Comment | PI | S
166 ;; /* Document Type Definition */
168 ;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
169 ;; ('[' intSubset ']' S?)? '>'
170 ;; [VC: Root Element Type]
171 ;; [WFC: External Subset]
173 ;; DeclSep ::= PEReference | S
174 ;; [WFC: PE Between Declarations]
176 ;; intSubset ::= (markupdecl | DeclSep)*
178 ;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
179 ;; | NotationDecl | PI | Comment
180 ;; [VC: Proper Declaration/PE Nesting]
181 ;; [WFC: PEs in Internal Subset]
184 ;; /* External Subset */
186 ;; extSubset ::= TextDecl? extSubsetDecl
188 ;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
191 ;; /* Standalone Document Declaration */
193 ;; SDDecl ::= S 'standalone' Eq
194 ;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
195 ;; [VC: Standalone Document Declaration]
198 ;; /* Element */
200 ;; element ::= EmptyElemTag | STag content ETag
201 ;; [WFC: Element Type Match]
202 ;; [VC: Element Valid]
205 ;; /* Start-tag */
207 ;; STag ::= '<' Name (S Attribute)* S? '>'
208 ;; [WFC: Unique Att Spec]
210 ;; Attribute ::= Name Eq AttValue
211 ;; [VC: Attribute Value Type]
212 ;; [WFC: No External Entity References]
213 ;; [WFC: No < in Attribute Values]
216 ;; /* End-tag */
218 ;; ETag ::= '</' Name S? '>'
221 ;; /* Content of Elements */
223 ;; content ::= CharData?
224 ;; ((element | Reference | CDSect | PI | Comment) CharData?)*
227 ;; /* Tags for Empty Elements */
229 ;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
230 ;; [WFC: Unique Att Spec]
233 ;; /* Element Type Declaration */
235 ;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
236 ;; [VC: Unique Element Type Declaration]
238 ;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
241 ;; /* Element-content Models */
243 ;; children ::= (choice | seq) ('?' | '*' | '+')?
245 ;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
247 ;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
248 ;; [VC: Proper Group/PE Nesting]
250 ;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
251 ;; [VC: Proper Group/PE Nesting]
254 ;; /* Mixed-content Declaration */
256 ;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
257 ;; | '(' S? '#PCDATA' S? ')'
258 ;; [VC: Proper Group/PE Nesting]
259 ;; [VC: No Duplicate Types]
262 ;; /* Attribute-list Declaration */
264 ;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
266 ;; AttDef ::= S Name S AttType S DefaultDecl
269 ;; /* Attribute Types */
271 ;; AttType ::= StringType | TokenizedType | EnumeratedType
273 ;; StringType ::= 'CDATA'
275 ;; TokenizedType ::= 'ID' [VC: ID]
276 ;; [VC: One ID per Element Type]
277 ;; [VC: ID Attribute Default]
278 ;; | 'IDREF' [VC: IDREF]
279 ;; | 'IDREFS' [VC: IDREF]
280 ;; | 'ENTITY' [VC: Entity Name]
281 ;; | 'ENTITIES' [VC: Entity Name]
282 ;; | 'NMTOKEN' [VC: Name Token]
283 ;; | 'NMTOKENS' [VC: Name Token]
286 ;; /* Enumerated Attribute Types */
288 ;; EnumeratedType ::= NotationType | Enumeration
290 ;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
291 ;; [VC: Notation Attributes]
292 ;; [VC: One Notation Per Element Type]
293 ;; [VC: No Notation on Empty Element]
294 ;; [VC: No Duplicate Tokens]
296 ;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
297 ;; [VC: Enumeration]
298 ;; [VC: No Duplicate Tokens]
301 ;; /* Attribute Defaults */
303 ;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
304 ;; | (('#FIXED' S)? AttValue)
305 ;; [VC: Required Attribute]
306 ;; [VC: Attribute Default Value Syntactically Correct]
307 ;; [WFC: No < in Attribute Values]
308 ;; [VC: Fixed Attribute Default]
311 ;; /* Conditional Section */
313 ;; conditionalSect ::= includeSect | ignoreSect
315 ;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
316 ;; [VC: Proper Conditional Section/PE Nesting]
318 ;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
319 ;; [VC: Proper Conditional Section/PE Nesting]
321 ;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
323 ;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
326 ;; /* Character Reference */
328 ;; CharRef ::= '&#' [0-9]+ ';'
329 ;; | '&#x' [0-9a-fA-F]+ ';'
330 ;; [WFC: Legal Character]
333 ;; /* Entity Reference */
335 ;; Reference ::= EntityRef | CharRef
337 ;; EntityRef ::= '&' Name ';'
338 ;; [WFC: Entity Declared]
339 ;; [VC: Entity Declared]
340 ;; [WFC: Parsed Entity]
341 ;; [WFC: No Recursion]
343 ;; PEReference ::= '%' Name ';'
344 ;; [VC: Entity Declared]
345 ;; [WFC: No Recursion]
346 ;; [WFC: In DTD]
349 ;; /* Entity Declaration */
351 ;; EntityDecl ::= GEDecl | PEDecl
353 ;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
355 ;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
357 ;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
359 ;; PEDef ::= EntityValue | ExternalID
362 ;; /* External Entity Declaration */
364 ;; ExternalID ::= 'SYSTEM' S SystemLiteral
365 ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
367 ;; NDataDecl ::= S 'NDATA' S Name
368 ;; [VC: Notation Declared]
371 ;; /* Text Declaration */
373 ;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
376 ;; /* Well-Formed External Parsed Entity */
378 ;; extParsedEnt ::= TextDecl? content
381 ;; /* Encoding Declaration */
383 ;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
385 ;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
386 ;; /* Encoding name contains only Latin characters */
389 ;; /* Notation Declarations */
391 ;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
392 ;; [VC: Unique Notation Name]
394 ;; PublicID ::= 'PUBLIC' S PubidLiteral
397 ;; /* Characters */
399 ;; Letter ::= BaseChar | Ideographic
401 ;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
402 ;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
403 ;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
404 ;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
405 ;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
406 ;; | #x0386 | [#x0388-#x038A] | #x038C
407 ;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
408 ;; | #x03DA | #x03DC | #x03DE
409 ;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C]
410 ;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
411 ;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
412 ;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
413 ;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586]
414 ;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
415 ;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
416 ;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
417 ;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
418 ;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
419 ;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
420 ;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
421 ;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
422 ;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
423 ;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
424 ;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
425 ;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
426 ;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
427 ;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C]
428 ;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
429 ;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
430 ;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
431 ;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
432 ;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
433 ;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
434 ;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
435 ;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
436 ;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
437 ;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
438 ;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
439 ;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
440 ;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
441 ;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
442 ;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D
443 ;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
444 ;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB]
445 ;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
446 ;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
447 ;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
448 ;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107]
449 ;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112]
450 ;; | #x113C | #x113E | #x1140
451 ;; | #x114C | #x114E | #x1150
452 ;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161]
453 ;; | #x1163 | #x1165 | #x1167
454 ;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173]
455 ;; | #x1175 | #x119E | #x11A8
456 ;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8]
457 ;; | #x11BA | [#x11BC-#x11C2] | #x11EB
458 ;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B]
459 ;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
460 ;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
461 ;; | #x1F59 | #x1F5B | #x1F5D
462 ;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
463 ;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
464 ;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
465 ;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
466 ;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182]
467 ;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
468 ;; | [#xAC00-#xD7A3]
470 ;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
472 ;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
473 ;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
474 ;; | #x05BF | [#x05C1-#x05C2] | #x05C4
475 ;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC]
476 ;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
477 ;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
478 ;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954]
479 ;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
480 ;; | #x09BE | #x09BF | [#x09C0-#x09C4]
481 ;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
482 ;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C
483 ;; | #x0A3E | #x0A3F | [#x0A40-#x0A42]
484 ;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
485 ;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5]
486 ;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
487 ;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
488 ;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
489 ;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
490 ;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
491 ;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
492 ;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
493 ;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
494 ;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
495 ;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A]
496 ;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9]
497 ;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
498 ;; | #x0F35 | #x0F37 | #x0F39
499 ;; | #x0F3E | #x0F3F | [#x0F71-#x0F84]
500 ;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
501 ;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
502 ;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
503 ;; | #x3099 | #x309A
505 ;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
506 ;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
507 ;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
508 ;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
509 ;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
511 ;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
512 ;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
515 ;; NOTES
516 ;; -----
518 ;; At moment, only the `<!ELEMENT' generates a syntactic chart. The
519 ;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntacticly checked but they
520 ;; don't generate a syntactic chart.
522 ;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An
523 ;; example of a `pure' dtd file is:
525 ;; <?xml version="1.0" encoding="UTF-8"?>
526 ;; <!--
527 ;; The main element.
528 ;; -->
529 ;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
530 ;; steps, splits?, joins?)>
531 ;; <!--
532 ;; An action that can be executed (id must be unique among actions for
533 ;; the enclosing step).
534 ;; Used in: actions
535 ;; -->
536 ;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
537 ;; post-functions?)>
538 ;; <!ATTLIST action
539 ;; id CDATA #REQUIRED
540 ;; name CDATA #REQUIRED
541 ;; >
544 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
546 ;;; Code:
549 (require 'ebnf-otz)
552 (defvar ebnf-dtd-lex nil
553 "Value returned by `ebnf-dtd-lex' function.")
556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
557 ;; Syntactic analyzer
560 ;;; document ::= prolog element Misc*
561 ;;; /* Note that *only* the prolog will be parsed */
563 (defun ebnf-dtd-parser (start)
564 "DTD parser."
565 (let ((total (+ (- ebnf-limit start) 1))
566 (bias (1- start))
567 (origin (point))
568 rule-list token rule the-end)
569 (goto-char start)
570 (setq token (ebnf-dtd-lex))
571 (and (eq token 'end-of-input)
572 (error "Empty DTD file"))
573 (setq token (ebnf-dtd-prolog token))
574 (unless (eq (car token) 'end-prolog)
575 (setq the-end (cdr token)
576 token (car token))
577 (while (not (eq token the-end))
578 (ebnf-message-float
579 "Parsing...%s%%"
580 (/ (* (- (point) bias) 100.0) total))
581 (setq token (ebnf-dtd-intsubset token)
582 rule (cdr token)
583 token (car token))
584 (or (null rule)
585 (ebnf-add-empty-rule-list rule)
586 (setq rule-list (cons rule rule-list))))
587 (or (eq the-end 'end-of-input)
588 (eq (ebnf-dtd-lex) 'end-decl)
589 (error "Missing end of DOCTYPE"))
590 ;; adjust message, 'cause *only* prolog will be parsed
591 (ebnf-message-float "Parsing...%s%%" 100.0))
592 (goto-char origin)
593 rule-list))
596 ;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
598 ;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
600 ;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
602 ;;; Eq ::= S? '=' S?
604 ;;; VersionNum ::= '1.0'
606 ;;; Misc ::= Comment | PI | S
608 ;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
610 ;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
611 ;;; /* Encoding name contains only Latin characters */
613 ;;; SDDecl ::= S 'standalone' Eq
614 ;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
616 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
617 ;;; ('[' intSubset ']' S?)? '>'
620 (defun ebnf-dtd-prolog (token)
621 (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml"))
622 ;; version = "1.0"
623 (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
624 "^1\\.0$" "XML version"))
625 ;; ( encoding = "encoding name" )?
626 (setq token (ebnf-dtd-attribute-optional
627 token 'encoding-attr
628 "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
629 ;; ( standalone = ( "yes" | "no" ) )?
630 (setq token (ebnf-dtd-attribute-optional
631 token 'standalone-attr
632 "^yes|no$" "XML standalone"))
633 (or (eq token 'end-pi)
634 (error "Missing end of XML processing instruction")))
635 ;; processing instructions
636 (setq token (ebnf-dtd-pi (ebnf-dtd-lex)))
637 (cond
638 ;; DOCTYPE
639 ((eq token 'doctype-decl)
640 (or (eq (ebnf-dtd-lex) 'name)
641 (error "Document type name is missing"))
642 (cons (if (eq (ebnf-dtd-externalid) 'begin-subset)
643 (ebnf-dtd-lex)
644 'end-prolog)
645 'end-subset))
646 ((memq token '(element-decl attlist-decl entity-decl notation-decl))
647 (cons token 'end-of-input))
649 '(end-prolog . end-subset))
653 (defun ebnf-dtd-attribute (token attr match attr-name)
654 (or (eq token attr)
655 (error "%s attribute is missing" attr-name))
656 (ebnf-dtd-attribute-optional token attr match attr-name))
659 (defun ebnf-dtd-attribute-optional (token attr match attr-name)
660 (when (eq token attr)
661 (or (and (eq (ebnf-dtd-lex) 'equal)
662 (eq (ebnf-dtd-lex) 'string)
663 (string-match match ebnf-dtd-lex))
664 (error "XML %s attribute is invalid" attr-name))
665 (setq token (ebnf-dtd-lex)))
666 token)
669 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
670 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
673 (defun ebnf-dtd-externalid (&optional token)
674 (let ((must-have token))
675 (or token (setq token (ebnf-dtd-lex)))
676 (cond ((eq token 'system)
677 (ebnf-dtd-systemliteral))
678 ((eq token 'public)
679 (ebnf-dtd-pubidliteral)
680 (ebnf-dtd-systemliteral))
681 (must-have
682 (error "Missing `SYSTEM' or `PUBLIC' in external id"))
684 token))))
687 ;;; SystemLiteral ::= ('"' [^"]* '"')
688 ;;; | ("'" [^']* "'")
691 (defun ebnf-dtd-systemliteral ()
692 (or (eq (ebnf-dtd-lex) 'string)
693 (error "System identifier is invalid"))
694 (ebnf-dtd-lex))
697 ;;; PubidLiteral ::= '"' PubidChar* '"'
698 ;;; | "'" (PubidChar - "'")* "'"
700 ;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
703 (defun ebnf-dtd-pubidliteral ()
704 (or (and (eq (ebnf-dtd-lex) 'string)
705 (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
706 ebnf-dtd-lex))
707 (error "Public identifier is invalid")))
710 ;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
712 ;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
715 (defun ebnf-dtd-pi (token)
716 (while (eq token 'begin-pi)
717 (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex)
718 (error "Processing instruction name can not be `XML'"))
719 (while (not (eq (ebnf-dtd-lex) 'end-pi)))
720 (setq token (ebnf-dtd-lex)))
721 token)
724 ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
725 ;;; ('[' intSubset ']' S?)? '>'
727 ;;; intSubset ::= (markupdecl | DeclSep)*
729 ;;; DeclSep ::= PEReference | S
731 ;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
732 ;;; | NotationDecl | PI | Comment
735 (defun ebnf-dtd-intsubset (token)
736 ;; PI - Processing Instruction
737 (and (eq token 'begin-pi)
738 (setq token (ebnf-dtd-pi token)))
739 (cond
740 ((memq token '(end-subset end-of-input))
741 (cons token nil))
742 ((eq token 'pe-ref)
743 (cons (ebnf-dtd-lex) nil)) ; annotation
744 ((eq token 'element-decl)
745 (ebnf-dtd-elementdecl)) ; rule
746 ((eq token 'attlist-decl)
747 (ebnf-dtd-attlistdecl)) ; annotation
748 ((eq token 'entity-decl)
749 (ebnf-dtd-entitydecl)) ; annotation
750 ((eq token 'notation-decl)
751 (ebnf-dtd-notationdecl)) ; annotation
753 (error "Invalid DOCTYPE element"))
757 ;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
759 ;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
761 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
762 ;;; | '(' S? '#PCDATA' S? ')'
764 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
766 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
768 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
770 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
773 (defun ebnf-dtd-elementdecl ()
774 (let ((action ebnf-action)
775 name token body)
776 (setq ebnf-action nil)
777 (or (eq (ebnf-dtd-lex) 'name)
778 (error "Invalid ELEMENT name"))
779 (setq name ebnf-dtd-lex
780 token (ebnf-dtd-lex)
781 body (cond ((memq token '(empty any))
782 (let ((term (ebnf-make-terminal ebnf-dtd-lex)))
783 (cons (ebnf-dtd-lex) term)))
784 ((eq token 'begin-group)
785 (setq token (ebnf-dtd-lex))
786 (if (eq token 'pcdata)
787 (ebnf-dtd-mixed)
788 (ebnf-dtd-children token)))
790 (error "Invalid ELEMENT content"))
792 (or (eq (car body) 'end-decl)
793 (error "Missing `>' in ELEMENT declaration"))
794 (ebnf-eps-add-production name)
795 (cons (ebnf-dtd-lex)
796 (ebnf-make-production name (cdr body) action))))
799 ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
800 ;;; | '(' S? '#PCDATA' S? ')'
803 (defun ebnf-dtd-mixed ()
804 (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil))
805 (token (ebnf-dtd-lex))
806 (has-alternative (eq token 'alternative)))
807 (while (eq token 'alternative)
808 (or (eq (ebnf-dtd-lex) 'name)
809 (error "Invalid name"))
810 (setq alt (cons ebnf-dtd-lex alt)
811 token (ebnf-dtd-lex)))
812 (or (eq token 'end-group)
813 (error "Missing `)'"))
814 (and has-alternative
815 (or (eq (ebnf-dtd-lex) 'zero-or-more)
816 (error "Missing `*'")))
817 (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil))))
820 ;;; children ::= (choice | seq) ('?' | '*' | '+')?
823 (defun ebnf-dtd-children (token)
824 (ebnf-dtd-operators (ebnf-dtd-choice-seq token)))
827 ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
829 ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
832 (defun ebnf-dtd-choice-seq (token)
833 (setq token (ebnf-dtd-cp token))
834 (let (elist)
835 (cond
836 ;; choice
837 ((eq (car token) 'alternative)
838 (while (eq (car token) 'alternative)
839 (setq elist (cons (cdr token) elist)
840 token (ebnf-dtd-cp (ebnf-dtd-lex))))
841 (setq elist (ebnf-token-alternative elist token)))
842 ;; seq
843 ((eq (car token) 'comma)
844 (while (eq (car token) 'comma)
845 (setq elist (cons (cdr token) elist)
846 token (ebnf-dtd-cp (ebnf-dtd-lex))))
847 (setq elist (ebnf-token-sequence (cons (cdr token) elist))))
848 ;; only one element
850 (setq elist (cdr token))))
851 (or (eq (car token) 'end-group)
852 (error "Missing `)' in ELEMENT content"))
853 elist))
856 ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
859 (defun ebnf-dtd-cp (token)
860 (ebnf-dtd-operators (cond ((eq token 'name)
861 (ebnf-make-terminal ebnf-dtd-lex))
862 ((eq token 'begin-group)
863 (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
865 (error "Invalid element"))
869 ;;; elm ('?' | '*' | '+')?
872 (defun ebnf-dtd-operators (elm)
873 (let ((token (ebnf-dtd-lex)))
874 (cond ((eq token 'optional) ; ? - optional
875 (cons (ebnf-dtd-lex) (ebnf-token-optional elm)))
876 ((eq token 'zero-or-more) ; * - zero or more
877 (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm)))
878 ((eq token 'one-or-more) ; + - one or more
879 (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm)))
880 (t ; only element
881 (cons token elm))
885 ;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
887 ;;; AttDef ::= S Name S AttType S DefaultDecl
889 ;;; AttType ::= StringType | TokenizedType | EnumeratedType
891 ;;; StringType ::= 'CDATA'
893 ;;; TokenizedType ::= 'ID'
894 ;;; | 'IDREF'
895 ;;; | 'IDREFS'
896 ;;; | 'ENTITY'
897 ;;; | 'ENTITIES'
898 ;;; | 'NMTOKEN'
899 ;;; | 'NMTOKENS'
901 ;;; EnumeratedType ::= NotationType | Enumeration
903 ;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
905 ;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
907 ;;; DefaultDecl ::= '#REQUIRED'
908 ;;; | '#IMPLIED'
909 ;;; | (('#FIXED' S)? AttValue)
912 ;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
913 ;;; | "'" ([^<&'] | Reference)* "'"
915 ;;; Reference ::= EntityRef | CharRef
917 ;;; EntityRef ::= '&' Name ';'
919 ;;; CharRef ::= '&#' [0-9]+ ';'
920 ;;; | '&#x' [0-9a-fA-F]+ ';'
922 ;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
925 (defun ebnf-dtd-attlistdecl ()
926 (or (eq (ebnf-dtd-lex) 'name)
927 (error "Invalid ATTLIST name"))
928 (let (token)
929 (while (eq (setq token (ebnf-dtd-lex)) 'name)
930 ;; type
931 (setq token (ebnf-dtd-lex))
932 (cond
933 ((eq token 'notation)
934 (or (eq (ebnf-dtd-lex) 'begin-group)
935 (error "Missing `(' in NOTATION type in ATTLIST declaration"))
936 (ebnf-dtd-namelist "NOTATION" '(name)))
937 ((eq token 'begin-group)
938 (ebnf-dtd-namelist "enumeration" '(name name-char)))
939 ((memq token
940 '(cdata id idref idrefs entity entities nmtoken nmtokens)))
942 (error "Invalid type in ATTLIST declaration")))
943 ;; default value
944 (setq token (ebnf-dtd-lex))
945 (unless (memq token '(required implied))
946 (and (eq token 'fixed)
947 (setq token (ebnf-dtd-lex)))
948 (or (and (eq token 'string)
949 (string-match
950 "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
951 ebnf-dtd-lex))
952 (error "Invalid default value in ATTLIST declaration"))))
953 (or (eq token 'end-decl)
954 (error "Missing `>' in end of ATTLIST"))
955 (cons (ebnf-dtd-lex) nil)))
958 (defun ebnf-dtd-namelist (type name-list)
959 (let (token)
960 (while (progn
961 (or (memq (ebnf-dtd-lex) name-list)
962 (error "Invalid name in %s type in ATTLIST declaration" type))
963 (eq (setq token (ebnf-dtd-lex)) 'alternative)))
964 (or (eq token 'end-group)
965 (error "Missing `)' in %s type in ATTLIST declaration" type))))
968 ;;; EntityDecl ::= GEDecl | PEDecl
970 ;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
972 ;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
974 ;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
976 ;;; PEDef ::= EntityValue | ExternalID
978 ;;; NDataDecl ::= S 'NDATA' S Name
981 ;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
982 ;;; | "'" ([^%&'] | PEReference | Reference)* "'"
984 ;;; PEReference ::= '%' Name ';'
986 ;;; Reference ::= EntityRef | CharRef
988 ;;; EntityRef ::= '&' Name ';'
990 ;;; CharRef ::= '&#' [0-9]+ ';'
991 ;;; | '&#x' [0-9a-fA-F]+ ';'
993 ;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
996 (defun ebnf-dtd-entitydecl ()
997 (let* ((token (ebnf-dtd-lex))
998 (pedecl (eq token 'percent)))
999 (and pedecl
1000 (setq token (ebnf-dtd-lex)))
1001 (or (eq token 'name)
1002 (error "Invalid name of ENTITY"))
1003 (setq token (ebnf-dtd-lex))
1004 (if (eq token 'string)
1005 (if (string-match
1006 "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
1007 ebnf-dtd-lex)
1008 (setq token (ebnf-dtd-lex))
1009 (error "Invalid ENTITY definition"))
1010 (setq token (ebnf-dtd-externalid token))
1011 (when (and (not pedecl) (eq token 'ndata))
1012 (or (eq (ebnf-dtd-lex) 'name)
1013 (error "Invalid NDATA name"))
1014 (setq token (ebnf-dtd-lex))))
1015 (or (eq token 'end-decl)
1016 (error "Missing `>' in end of ENTITY"))
1017 (cons (ebnf-dtd-lex) nil)))
1020 ;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
1022 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1025 (defun ebnf-dtd-notationdecl ()
1026 (or (eq (ebnf-dtd-lex) 'name)
1027 (error "Invalid name NOTATION"))
1028 (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl)
1029 (error "Missing `>' in end of NOTATION"))
1030 (cons (ebnf-dtd-lex) nil))
1033 ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
1034 ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
1036 ;;; PublicID ::= 'PUBLIC' S PubidLiteral
1039 (defun ebnf-dtd-externalid-or-publicid ()
1040 (let ((token (ebnf-dtd-lex)))
1041 (cond ((eq token 'system)
1042 (ebnf-dtd-systemliteral))
1043 ((eq token 'public)
1044 (ebnf-dtd-pubidliteral)
1045 (and (eq (setq token (ebnf-dtd-lex)) 'string)
1046 (setq token (ebnf-dtd-lex)))
1047 token)
1049 (error "Missing `SYSTEM' or `PUBLIC'")))))
1052 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1053 ;; Lexical analyzer
1056 (defconst ebnf-dtd-token-table (make-vector 256 'error)
1057 "Vector used to map characters to a lexical token.")
1060 (defun ebnf-dtd-initialize ()
1061 "Initialize EBNF token table."
1062 ;; control character & control 8-bit character are set to `error'
1063 (let ((char ?\060))
1064 ;; digits: 0-9
1065 (while (< char ?\072)
1066 (aset ebnf-dtd-token-table char 'name-char)
1067 (setq char (1+ char)))
1068 ;; printable character: A-Z
1069 (setq char ?\101)
1070 (while (< char ?\133)
1071 (aset ebnf-dtd-token-table char 'name)
1072 (setq char (1+ char)))
1073 ;; printable character: a-z
1074 (setq char ?\141)
1075 (while (< char ?\173)
1076 (aset ebnf-dtd-token-table char 'name)
1077 (setq char (1+ char)))
1078 ;; European 8-bit accentuated characters:
1079 (setq char ?\240)
1080 (while (< char ?\400)
1081 (aset ebnf-dtd-token-table char 'name)
1082 (setq char (1+ char)))
1083 ;; Override name characters:
1084 (aset ebnf-dtd-token-table ?_ 'name)
1085 (aset ebnf-dtd-token-table ?: 'name)
1086 (aset ebnf-dtd-token-table ?. 'name-char)
1087 (aset ebnf-dtd-token-table ?- 'name-char)
1088 ;; Override space characters:
1089 (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed
1090 (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return
1091 (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab
1092 (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space
1093 ;; Override other lexical characters:
1094 (aset ebnf-dtd-token-table ?= 'equal)
1095 (aset ebnf-dtd-token-table ?, 'comma)
1096 (aset ebnf-dtd-token-table ?* 'zero-or-more)
1097 (aset ebnf-dtd-token-table ?+ 'one-or-more)
1098 (aset ebnf-dtd-token-table ?| 'alternative)
1099 (aset ebnf-dtd-token-table ?% 'percent)
1100 (aset ebnf-dtd-token-table ?& 'ampersand)
1101 (aset ebnf-dtd-token-table ?# 'hash)
1102 (aset ebnf-dtd-token-table ?\? 'interrogation)
1103 (aset ebnf-dtd-token-table ?\" 'double-quote)
1104 (aset ebnf-dtd-token-table ?\' 'single-quote)
1105 (aset ebnf-dtd-token-table ?< 'less-than)
1106 (aset ebnf-dtd-token-table ?> 'end-decl)
1107 (aset ebnf-dtd-token-table ?\( 'begin-group)
1108 (aset ebnf-dtd-token-table ?\) 'end-group)
1109 (aset ebnf-dtd-token-table ?\[ 'begin-subset)
1110 (aset ebnf-dtd-token-table ?\] 'end-subset)))
1113 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1114 (defconst ebnf-dtd-name-chars
1115 (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377))
1118 (defconst ebnf-dtd-decl-alist
1119 '(("ATTLIST" . attlist-decl)
1120 ("DOCTYPE" . doctype-decl)
1121 ("ELEMENT" . element-decl)
1122 ("ENTITY" . entity-decl)
1123 ("NOTATION" . notation-decl)))
1126 (defconst ebnf-dtd-element-alist
1127 '(("#FIXED" . fixed)
1128 ("#IMPLIED" . implied)
1129 ("#PCDATA" . pcdata)
1130 ("#REQUIRED" . required)))
1133 (defconst ebnf-dtd-name-alist
1134 '(("ANY" . any)
1135 ("CDATA" . cdata)
1136 ("EMPTY" . empty)
1137 ("ENTITIES" . entities)
1138 ("ENTITY" . entity)
1139 ("ID" . id)
1140 ("IDREF" . idref)
1141 ("IDREFS" . idrefs)
1142 ("NDATA" . ndata)
1143 ("NMTOKEN" . nmtoken)
1144 ("NMTOKENS" . nmtokens)
1145 ("NOTATION" . notation)
1146 ("PUBLIC" . public)
1147 ("SYSTEM" . system)
1148 ("encoding" . encoding-attr)
1149 ("standalone" . standalone-attr)
1150 ("version" . version-attr)))
1153 (defun ebnf-dtd-lex ()
1154 "Lexical analyzer for DTD.
1156 Return a lexical token.
1158 See documentation for variable `ebnf-dtd-lex'."
1159 (if (>= (point) ebnf-limit)
1160 'end-of-input
1161 (let (token)
1162 ;; skip spaces and comments
1163 (while (if (> (following-char) 255)
1164 (progn
1165 (setq token 'error)
1166 nil)
1167 (setq token (aref ebnf-dtd-token-table (following-char)))
1168 (cond
1169 ((eq token 'space)
1170 (skip-chars-forward " \n\r\t" ebnf-limit)
1171 (< (point) ebnf-limit))
1172 ((and (eq token 'less-than)
1173 (looking-at "<!--"))
1174 (ebnf-dtd-skip-comment))
1175 (t nil)
1177 (cond
1178 ;; end of input
1179 ((>= (point) ebnf-limit)
1180 'end-of-input)
1181 ;; error
1182 ((eq token 'error)
1183 (error "Invalid character"))
1184 ;; beginning of declaration:
1185 ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION
1186 ((eq token 'less-than)
1187 (forward-char)
1188 (let ((char (following-char)))
1189 (cond ((= char ?\?) ; <?
1190 (forward-char)
1191 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1192 'begin-pi)
1193 ((= char ?!) ; <!
1194 (forward-char)
1195 (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars)))
1196 (or (cdr (assoc decl ebnf-dtd-decl-alist))
1197 (error "Invalid declaration name `%s'" decl))))
1198 (t ; <x
1199 (error "Invalid declaration `<%c'" char)))))
1200 ;; name, namechar
1201 ((memq token '(name name-char))
1202 (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
1203 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist))
1204 token))
1205 ;; ?, ?>
1206 ((eq token 'interrogation)
1207 (forward-char)
1208 (if (/= (following-char) ?>)
1209 'optional
1210 (forward-char)
1211 'end-pi))
1212 ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED
1213 ((eq token 'hash)
1214 (forward-char)
1215 (setq ebnf-dtd-lex
1216 (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars)))
1217 (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist))
1218 (error "Invalid element `%s'" ebnf-dtd-lex)))
1219 ;; "string"
1220 ((eq token 'double-quote)
1221 (setq ebnf-dtd-lex (ebnf-dtd-string ?\"))
1222 'string)
1223 ;; 'string'
1224 ((eq token 'single-quote)
1225 (setq ebnf-dtd-lex (ebnf-dtd-string ?\'))
1226 'string)
1227 ;; %, %name;
1228 ((eq token 'percent)
1229 (forward-char)
1230 (if (looking-at "[ \n\r\t]")
1231 'percent
1232 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "%"))
1233 'pe-ref))
1234 ;; &#...;, &#x...;, &name;
1235 ((eq token 'ampersand)
1236 (forward-char)
1237 (if (/= (following-char) ?#)
1238 (progn
1239 ;; &name;
1240 (setq ebnf-dtd-lex (ebnf-dtd-name-ref "&"))
1241 'entity-ref)
1242 ;; &#...;, &#x...;
1243 (forward-char)
1244 (setq ebnf-dtd-lex (if (/= (following-char) ?x)
1245 (ebnf-dtd-char-ref "&#" "0-9")
1246 (forward-char)
1247 (ebnf-dtd-char-ref "&#x" "0-9a-fA-F")))
1248 'char-ref))
1249 ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,'
1251 (forward-char)
1252 token)
1253 ))))
1256 (defun ebnf-dtd-name-ref (start)
1257 (ebnf-dtd-char-ref start ebnf-dtd-name-chars))
1260 (defun ebnf-dtd-char-ref (start chars)
1261 (let ((char (ebnf-buffer-substring chars)))
1262 (or (= (following-char) ?\;)
1263 (error "Invalid element `%s%s%c'" start char (following-char)))
1264 (forward-char)
1265 (format "%s%s;" start char)))
1268 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
1269 (defconst ebnf-dtd-double-string-chars
1270 (ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
1271 (defconst ebnf-dtd-single-string-chars
1272 (ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
1275 (defun ebnf-dtd-string (delim)
1276 (buffer-substring-no-properties
1277 (progn
1278 (forward-char)
1279 (point))
1280 (progn
1281 (skip-chars-forward (if (= delim ?\")
1282 ebnf-dtd-double-string-chars
1283 ebnf-dtd-single-string-chars)
1284 ebnf-limit)
1285 (or (= (following-char) delim)
1286 (error "Missing string delimiter `%c'" delim))
1287 (prog1
1288 (point)
1289 (forward-char)))))
1292 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
1293 (defconst ebnf-dtd-comment-chars
1294 (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?\177 ?\237))
1295 (defconst ebnf-dtd-filename-chars
1296 (ebnf-range-regexp "^-\000-\037" ?\177 ?\237))
1299 (defun ebnf-dtd-skip-comment ()
1300 (forward-char 4) ; <!--
1301 (cond
1302 ;; open EPS file
1303 ((and ebnf-eps-executing (= (following-char) ?\[))
1304 (ebnf-eps-add-context (ebnf-dtd-eps-filename)))
1305 ;; close EPS file
1306 ((and ebnf-eps-executing (= (following-char) ?\]))
1307 (ebnf-eps-remove-context (ebnf-dtd-eps-filename)))
1308 ;; EPS header
1309 ((and ebnf-eps-executing (= (following-char) ?H))
1310 (ebnf-eps-header-comment (ebnf-dtd-eps-filename)))
1311 ;; EPS footer
1312 ((and ebnf-eps-executing (= (following-char) ?F))
1313 (ebnf-eps-footer-comment (ebnf-dtd-eps-filename)))
1314 ;; any other action in comment
1316 (setq ebnf-action (aref ebnf-comment-table (following-char))))
1318 (while (progn
1319 (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit)
1320 (and (< (point) ebnf-limit)
1321 (not (looking-at "-->"))))
1322 (skip-chars-forward "-" ebnf-limit))
1323 ;; check for a valid end of comment
1324 (cond ((>= (point) ebnf-limit)
1325 nil)
1326 ((looking-at "-->")
1327 (forward-char 3)
1330 (error "Invalid character"))
1334 (defun ebnf-dtd-eps-filename ()
1335 (forward-char)
1336 (let (fname)
1337 (while (progn
1338 (setq fname
1339 (concat fname
1340 (ebnf-buffer-substring ebnf-dtd-filename-chars)))
1341 (and (< (point) ebnf-limit)
1342 (= (following-char) ?-) ; may be \n, \t, \r
1343 (not (looking-at "-->"))))
1344 (setq fname (concat fname (ebnf-buffer-substring "-"))))
1345 fname))
1348 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1351 (provide 'ebnf-dtd)
1353 ;; arch-tag: c21bb640-135f-4afa-8712-fa11d86301c4
1354 ;;; ebnf-dtd.el ends here