* xml.el: Protect parser against XML bombs.
[emacs.git] / lib-src / make-docfile.c
blob8156db9b73ad4c507f7dd75a02fda8be2c8855b2
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2012
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 /* The arguments given to this program are all the C and Lisp source files
22 of GNU Emacs. .elc and .el and .c files are allowed.
23 A .o file can also be specified; the .c file it was made from is used.
24 This helps the makefile pass the correct list of files.
25 Option -d DIR means change to DIR before looking for files.
27 The results, which go to standard output or to a file
28 specified with -a or -o (-a to append, -o to start from nothing),
29 are entries containing function or variable names and their documentation.
30 Each entry starts with a ^_ character.
31 Then comes F for a function or V for a variable.
32 Then comes the function or variable name, terminated with a newline.
33 Then comes the documentation for that function or variable.
36 #include <config.h>
38 /* Defined to be emacs_main, sys_fopen, etc. in config.h. */
39 #undef main
40 #undef fopen
41 #undef chdir
43 #include <stdio.h>
44 #include <stdlib.h>
45 #ifdef MSDOS
46 #include <fcntl.h>
47 #endif /* MSDOS */
48 #ifdef WINDOWSNT
49 #include <fcntl.h>
50 #include <direct.h>
51 #endif /* WINDOWSNT */
53 #ifdef DOS_NT
54 #define READ_TEXT "rt"
55 #define READ_BINARY "rb"
56 #else /* not DOS_NT */
57 #define READ_TEXT "r"
58 #define READ_BINARY "r"
59 #endif /* not DOS_NT */
61 #ifndef DIRECTORY_SEP
62 #define DIRECTORY_SEP '/'
63 #endif
65 #ifndef IS_DIRECTORY_SEP
66 #define IS_DIRECTORY_SEP(_c_) ((_c_) == DIRECTORY_SEP)
67 #endif
69 /* Use this to suppress gcc's `...may be used before initialized' warnings. */
70 #ifdef lint
71 # define IF_LINT(Code) Code
72 #else
73 # define IF_LINT(Code) /* empty */
74 #endif
76 static int scan_file (char *filename);
77 static int scan_lisp_file (const char *filename, const char *mode);
78 static int scan_c_file (char *filename, const char *mode);
79 static void start_globals (void);
80 static void write_globals (void);
82 #ifdef MSDOS
83 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
84 file where that function is defined. */
85 #undef chdir
86 #endif
88 #include <unistd.h>
90 /* Stdio stream for output to the DOC file. */
91 FILE *outfile;
93 /* Name this program was invoked with. */
94 char *progname;
96 /* Nonzero if this invocation is generating globals.h. */
97 int generate_globals;
99 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
101 /* VARARGS1 */
102 static void
103 error (const char *s1, const char *s2)
105 fprintf (stderr, "%s: ", progname);
106 fprintf (stderr, s1, s2);
107 fprintf (stderr, "\n");
110 /* Print error message and exit. */
112 /* VARARGS1 */
113 static _Noreturn void
114 fatal (const char *s1, const char *s2)
116 error (s1, s2);
117 exit (EXIT_FAILURE);
120 /* Like malloc but get fatal error if memory is exhausted. */
122 static void *
123 xmalloc (unsigned int size)
125 void *result = (void *) malloc (size);
126 if (result == NULL)
127 fatal ("virtual memory exhausted", 0);
128 return result;
131 /* Like realloc but get fatal error if memory is exhausted. */
133 static void *
134 xrealloc (void *arg, unsigned int size)
136 void *result = (void *) realloc (arg, size);
137 if (result == NULL)
138 fatal ("virtual memory exhausted", 0);
139 return result;
144 main (int argc, char **argv)
146 int i;
147 int err_count = 0;
148 int first_infile;
150 progname = argv[0];
152 outfile = stdout;
154 /* Don't put CRs in the DOC file. */
155 #ifdef MSDOS
156 _fmode = O_BINARY;
157 #if 0 /* Suspicion is that this causes hanging.
158 So instead we require people to use -o on MSDOS. */
159 (stdout)->_flag &= ~_IOTEXT;
160 _setmode (fileno (stdout), O_BINARY);
161 #endif
162 outfile = 0;
163 #endif /* MSDOS */
164 #ifdef WINDOWSNT
165 _fmode = O_BINARY;
166 _setmode (fileno (stdout), O_BINARY);
167 #endif /* WINDOWSNT */
169 /* If first two args are -o FILE, output to FILE. */
170 i = 1;
171 if (argc > i + 1 && !strcmp (argv[i], "-o"))
173 outfile = fopen (argv[i + 1], "w");
174 i += 2;
176 if (argc > i + 1 && !strcmp (argv[i], "-a"))
178 outfile = fopen (argv[i + 1], "a");
179 i += 2;
181 if (argc > i + 1 && !strcmp (argv[i], "-d"))
183 if (chdir (argv[i + 1]) != 0)
185 perror (argv[i + 1]);
186 return EXIT_FAILURE;
188 i += 2;
190 if (argc > i && !strcmp (argv[i], "-g"))
192 generate_globals = 1;
193 ++i;
196 if (outfile == 0)
197 fatal ("No output file specified", "");
199 if (generate_globals)
200 start_globals ();
202 first_infile = i;
203 for (; i < argc; i++)
205 int j;
206 /* Don't process one file twice. */
207 for (j = first_infile; j < i; j++)
208 if (! strcmp (argv[i], argv[j]))
209 break;
210 if (j == i)
211 err_count += scan_file (argv[i]);
214 if (err_count == 0 && generate_globals)
215 write_globals ();
217 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
220 /* Add a source file name boundary marker in the output file. */
221 static void
222 put_filename (char *filename)
224 char *tmp;
226 for (tmp = filename; *tmp; tmp++)
228 if (IS_DIRECTORY_SEP (*tmp))
229 filename = tmp + 1;
232 putc (037, outfile);
233 putc ('S', outfile);
234 fprintf (outfile, "%s\n", filename);
237 /* Read file FILENAME and output its doc strings to outfile. */
238 /* Return 1 if file is not found, 0 if it is found. */
240 static int
241 scan_file (char *filename)
244 size_t len = strlen (filename);
246 if (!generate_globals)
247 put_filename (filename);
248 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
249 return scan_lisp_file (filename, READ_BINARY);
250 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
251 return scan_lisp_file (filename, READ_TEXT);
252 else
253 return scan_c_file (filename, READ_TEXT);
256 static void
257 start_globals (void)
259 fprintf (outfile, "/* This file was auto-generated by make-docfile. */\n");
260 fprintf (outfile, "/* DO NOT EDIT. */\n");
261 fprintf (outfile, "struct emacs_globals {\n");
264 static char input_buffer[128];
266 /* Some state during the execution of `read_c_string_or_comment'. */
267 struct rcsoc_state
269 /* A count of spaces and newlines that have been read, but not output. */
270 unsigned pending_spaces, pending_newlines;
272 /* Where we're reading from. */
273 FILE *in_file;
275 /* If non-zero, a buffer into which to copy characters. */
276 char *buf_ptr;
277 /* If non-zero, a file into which to copy characters. */
278 FILE *out_file;
280 /* A keyword we look for at the beginning of lines. If found, it is
281 not copied, and SAW_KEYWORD is set to true. */
282 const char *keyword;
283 /* The current point we've reached in an occurrence of KEYWORD in
284 the input stream. */
285 const char *cur_keyword_ptr;
286 /* Set to true if we saw an occurrence of KEYWORD. */
287 int saw_keyword;
290 /* Output CH to the file or buffer in STATE. Any pending newlines or
291 spaces are output first. */
293 static inline void
294 put_char (int ch, struct rcsoc_state *state)
296 int out_ch;
299 if (state->pending_newlines > 0)
301 state->pending_newlines--;
302 out_ch = '\n';
304 else if (state->pending_spaces > 0)
306 state->pending_spaces--;
307 out_ch = ' ';
309 else
310 out_ch = ch;
312 if (state->out_file)
313 putc (out_ch, state->out_file);
314 if (state->buf_ptr)
315 *state->buf_ptr++ = out_ch;
317 while (out_ch != ch);
320 /* If in the middle of scanning a keyword, continue scanning with
321 character CH, otherwise output CH to the file or buffer in STATE.
322 Any pending newlines or spaces are output first, as well as any
323 previously scanned characters that were thought to be part of a
324 keyword, but were in fact not. */
326 static void
327 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
329 if (state->keyword
330 && *state->cur_keyword_ptr == ch
331 && (state->cur_keyword_ptr > state->keyword
332 || state->pending_newlines > 0))
333 /* We might be looking at STATE->keyword at some point.
334 Keep looking until we know for sure. */
336 if (*++state->cur_keyword_ptr == '\0')
337 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
339 state->saw_keyword = 1;
341 /* Reset the scanning pointer. */
342 state->cur_keyword_ptr = state->keyword;
344 /* Canonicalize whitespace preceding a usage string. */
345 state->pending_newlines = 2;
346 state->pending_spaces = 0;
348 /* Skip any whitespace between the keyword and the
349 usage string. */
351 ch = getc (state->in_file);
352 while (ch == ' ' || ch == '\n');
354 /* Output the open-paren we just read. */
355 put_char (ch, state);
357 /* Skip the function name and replace it with `fn'. */
359 ch = getc (state->in_file);
360 while (ch != ' ' && ch != ')');
361 put_char ('f', state);
362 put_char ('n', state);
364 /* Put back the last character. */
365 ungetc (ch, state->in_file);
368 else
370 if (state->keyword && state->cur_keyword_ptr > state->keyword)
371 /* We scanned the beginning of a potential usage
372 keyword, but it was a false alarm. Output the
373 part we scanned. */
375 const char *p;
377 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
378 put_char (*p, state);
380 state->cur_keyword_ptr = state->keyword;
383 put_char (ch, state);
388 /* Skip a C string or C-style comment from INFILE, and return the
389 character that follows. COMMENT non-zero means skip a comment. If
390 PRINTFLAG is positive, output string contents to outfile. If it is
391 negative, store contents in buf. Convert escape sequences \n and
392 \t to newline and tab; discard \ followed by newline.
393 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
394 at the beginning of a line will be removed, and *SAW_USAGE set to
395 true if any were encountered. */
397 static int
398 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
400 register int c;
401 struct rcsoc_state state;
403 state.in_file = infile;
404 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
405 state.out_file = (printflag > 0 ? outfile : 0);
406 state.pending_spaces = 0;
407 state.pending_newlines = 0;
408 state.keyword = (saw_usage ? "usage:" : 0);
409 state.cur_keyword_ptr = state.keyword;
410 state.saw_keyword = 0;
412 c = getc (infile);
413 if (comment)
414 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
415 c = getc (infile);
417 while (c != EOF)
419 while (c != EOF && (comment ? c != '*' : c != '"'))
421 if (c == '\\')
423 c = getc (infile);
424 if (c == '\n' || c == '\r')
426 c = getc (infile);
427 continue;
429 if (c == 'n')
430 c = '\n';
431 if (c == 't')
432 c = '\t';
435 if (c == ' ')
436 state.pending_spaces++;
437 else if (c == '\n')
439 state.pending_newlines++;
440 state.pending_spaces = 0;
442 else
443 scan_keyword_or_put_char (c, &state);
445 c = getc (infile);
448 if (c != EOF)
449 c = getc (infile);
451 if (comment)
453 if (c == '/')
455 c = getc (infile);
456 break;
459 scan_keyword_or_put_char ('*', &state);
461 else
463 if (c != '"')
464 break;
466 /* If we had a "", concatenate the two strings. */
467 c = getc (infile);
471 if (printflag < 0)
472 *state.buf_ptr = 0;
474 if (saw_usage)
475 *saw_usage = state.saw_keyword;
477 return c;
482 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
483 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
485 static void
486 write_c_args (FILE *out, char *func, char *buf, int minargs, int maxargs)
488 register char *p;
489 int in_ident = 0;
490 char *ident_start IF_LINT (= NULL);
491 size_t ident_length = 0;
493 fprintf (out, "(fn");
495 if (*buf == '(')
496 ++buf;
498 for (p = buf; *p; p++)
500 char c = *p;
502 /* Notice when a new identifier starts. */
503 if ((('A' <= c && c <= 'Z')
504 || ('a' <= c && c <= 'z')
505 || ('0' <= c && c <= '9')
506 || c == '_')
507 != in_ident)
509 if (!in_ident)
511 in_ident = 1;
512 ident_start = p;
514 else
516 in_ident = 0;
517 ident_length = p - ident_start;
521 /* Found the end of an argument, write out the last seen
522 identifier. */
523 if (c == ',' || c == ')')
525 if (ident_length == 0)
527 error ("empty arg list for `%s' should be (void), not ()", func);
528 continue;
531 if (strncmp (ident_start, "void", ident_length) == 0)
532 continue;
534 putc (' ', out);
536 if (minargs == 0 && maxargs > 0)
537 fprintf (out, "&optional ");
539 minargs--;
540 maxargs--;
542 /* In C code, `default' is a reserved word, so we spell it
543 `defalt'; demangle that here. */
544 if (ident_length == 6 && strncmp (ident_start, "defalt", 6) == 0)
545 fprintf (out, "DEFAULT");
546 else
547 while (ident_length-- > 0)
549 c = *ident_start++;
550 if (c >= 'a' && c <= 'z')
551 /* Upcase the letter. */
552 c += 'A' - 'a';
553 else if (c == '_')
554 /* Print underscore as hyphen. */
555 c = '-';
556 putc (c, out);
561 putc (')', out);
564 /* The types of globals. */
565 enum global_type
567 EMACS_INTEGER,
568 BOOLEAN,
569 LISP_OBJECT,
570 INVALID
573 /* A single global. */
574 struct global
576 enum global_type type;
577 char *name;
580 /* All the variable names we saw while scanning C sources in `-g'
581 mode. */
582 int num_globals;
583 int num_globals_allocated;
584 struct global *globals;
586 static void
587 add_global (enum global_type type, char *name)
589 /* Ignore the one non-symbol that can occur. */
590 if (strcmp (name, "..."))
592 ++num_globals;
594 if (num_globals_allocated == 0)
596 num_globals_allocated = 100;
597 globals = xmalloc (num_globals_allocated * sizeof (struct global));
599 else if (num_globals == num_globals_allocated)
601 num_globals_allocated *= 2;
602 globals = xrealloc (globals,
603 num_globals_allocated * sizeof (struct global));
606 globals[num_globals - 1].type = type;
607 globals[num_globals - 1].name = name;
611 static int
612 compare_globals (const void *a, const void *b)
614 const struct global *ga = a;
615 const struct global *gb = b;
616 return strcmp (ga->name, gb->name);
619 static void
620 write_globals (void)
622 int i;
623 qsort (globals, num_globals, sizeof (struct global), compare_globals);
624 for (i = 0; i < num_globals; ++i)
626 char const *type;
628 switch (globals[i].type)
630 case EMACS_INTEGER:
631 type = "EMACS_INT";
632 break;
633 case BOOLEAN:
634 type = "int";
635 break;
636 case LISP_OBJECT:
637 type = "Lisp_Object";
638 break;
639 default:
640 fatal ("not a recognized DEFVAR_", 0);
643 fprintf (outfile, " %s f_%s;\n", type, globals[i].name);
644 fprintf (outfile, "#define %s globals.f_%s\n",
645 globals[i].name, globals[i].name);
646 while (i + 1 < num_globals
647 && !strcmp (globals[i].name, globals[i + 1].name))
648 ++i;
651 fprintf (outfile, "};\n");
652 fprintf (outfile, "extern struct emacs_globals globals;\n");
656 /* Read through a c file. If a .o file is named,
657 the corresponding .c or .m file is read instead.
658 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
659 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
661 static int
662 scan_c_file (char *filename, const char *mode)
664 FILE *infile;
665 register int c;
666 register int commas;
667 int minargs, maxargs;
668 int extension = filename[strlen (filename) - 1];
670 if (extension == 'o')
671 filename[strlen (filename) - 1] = 'c';
673 infile = fopen (filename, mode);
675 if (infile == NULL && extension == 'o')
677 /* Try .m. */
678 filename[strlen (filename) - 1] = 'm';
679 infile = fopen (filename, mode);
680 if (infile == NULL)
681 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
684 /* No error if non-ex input file. */
685 if (infile == NULL)
687 perror (filename);
688 return 0;
691 /* Reset extension to be able to detect duplicate files. */
692 filename[strlen (filename) - 1] = extension;
694 c = '\n';
695 while (!feof (infile))
697 int doc_keyword = 0;
698 int defunflag = 0;
699 int defvarperbufferflag = 0;
700 int defvarflag = 0;
701 enum global_type type = INVALID;
703 if (c != '\n' && c != '\r')
705 c = getc (infile);
706 continue;
708 c = getc (infile);
709 if (c == ' ')
711 while (c == ' ')
712 c = getc (infile);
713 if (c != 'D')
714 continue;
715 c = getc (infile);
716 if (c != 'E')
717 continue;
718 c = getc (infile);
719 if (c != 'F')
720 continue;
721 c = getc (infile);
722 if (c != 'V')
723 continue;
724 c = getc (infile);
725 if (c != 'A')
726 continue;
727 c = getc (infile);
728 if (c != 'R')
729 continue;
730 c = getc (infile);
731 if (c != '_')
732 continue;
734 defvarflag = 1;
736 c = getc (infile);
737 defvarperbufferflag = (c == 'P');
738 if (generate_globals)
740 if (c == 'I')
741 type = EMACS_INTEGER;
742 else if (c == 'L')
743 type = LISP_OBJECT;
744 else if (c == 'B')
745 type = BOOLEAN;
748 c = getc (infile);
749 /* We need to distinguish between DEFVAR_BOOL and
750 DEFVAR_BUFFER_DEFAULTS. */
751 if (generate_globals && type == BOOLEAN && c != 'O')
752 type = INVALID;
754 else if (c == 'D')
756 c = getc (infile);
757 if (c != 'E')
758 continue;
759 c = getc (infile);
760 if (c != 'F')
761 continue;
762 c = getc (infile);
763 defunflag = c == 'U';
765 else continue;
767 if (generate_globals && (!defvarflag || defvarperbufferflag
768 || type == INVALID))
769 continue;
771 while (c != '(')
773 if (c < 0)
774 goto eof;
775 c = getc (infile);
778 /* Lisp variable or function name. */
779 c = getc (infile);
780 if (c != '"')
781 continue;
782 c = read_c_string_or_comment (infile, -1, 0, 0);
784 if (generate_globals)
786 int i = 0;
787 char *name;
789 /* Skip "," and whitespace. */
792 c = getc (infile);
794 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
796 /* Read in the identifier. */
799 input_buffer[i++] = c;
800 c = getc (infile);
802 while (! (c == ',' || c == ' ' || c == '\t'
803 || c == '\n' || c == '\r'));
804 input_buffer[i] = '\0';
806 name = xmalloc (i + 1);
807 memcpy (name, input_buffer, i + 1);
808 add_global (type, name);
809 continue;
812 /* DEFVAR_LISP ("name", addr, "doc")
813 DEFVAR_LISP ("name", addr /\* doc *\/)
814 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
816 if (defunflag)
817 commas = 5;
818 else if (defvarperbufferflag)
819 commas = 3;
820 else if (defvarflag)
821 commas = 1;
822 else /* For DEFSIMPLE and DEFPRED. */
823 commas = 2;
825 while (commas)
827 if (c == ',')
829 commas--;
831 if (defunflag && (commas == 1 || commas == 2))
833 int scanned = 0;
835 c = getc (infile);
836 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
837 if (c < 0)
838 goto eof;
839 ungetc (c, infile);
840 if (commas == 2) /* Pick up minargs. */
841 scanned = fscanf (infile, "%d", &minargs);
842 else /* Pick up maxargs. */
843 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
844 maxargs = -1;
845 else
846 scanned = fscanf (infile, "%d", &maxargs);
847 if (scanned < 0)
848 goto eof;
852 if (c == EOF)
853 goto eof;
854 c = getc (infile);
857 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
858 c = getc (infile);
860 if (c == '"')
861 c = read_c_string_or_comment (infile, 0, 0, 0);
863 while (c != EOF && c != ',' && c != '/')
864 c = getc (infile);
865 if (c == ',')
867 c = getc (infile);
868 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
869 c = getc (infile);
870 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
871 c = getc (infile);
872 if (c == ':')
874 doc_keyword = 1;
875 c = getc (infile);
876 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
877 c = getc (infile);
881 if (c == '"'
882 || (c == '/'
883 && (c = getc (infile),
884 ungetc (c, infile),
885 c == '*')))
887 int comment = c != '"';
888 int saw_usage;
890 putc (037, outfile);
891 putc (defvarflag ? 'V' : 'F', outfile);
892 fprintf (outfile, "%s\n", input_buffer);
894 if (comment)
895 getc (infile); /* Skip past `*'. */
896 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
898 /* If this is a defun, find the arguments and print them. If
899 this function takes MANY or UNEVALLED args, then the C source
900 won't give the names of the arguments, so we shouldn't bother
901 trying to find them.
903 Various doc-string styles:
904 0: DEFUN (..., "DOC") (args) [!comment]
905 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
906 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
908 if (defunflag && maxargs != -1 && !saw_usage)
910 char argbuf[1024], *p = argbuf;
912 if (!comment || doc_keyword)
913 while (c != ')')
915 if (c < 0)
916 goto eof;
917 c = getc (infile);
920 /* Skip into arguments. */
921 while (c != '(')
923 if (c < 0)
924 goto eof;
925 c = getc (infile);
927 /* Copy arguments into ARGBUF. */
928 *p++ = c;
930 *p++ = c = getc (infile);
931 while (c != ')');
932 *p = '\0';
933 /* Output them. */
934 fprintf (outfile, "\n\n");
935 write_c_args (outfile, input_buffer, argbuf, minargs, maxargs);
937 else if (defunflag && maxargs == -1 && !saw_usage)
938 /* The DOC should provide the usage form. */
939 fprintf (stderr, "Missing `usage' for function `%s'.\n",
940 input_buffer);
943 eof:
944 fclose (infile);
945 return 0;
948 /* Read a file of Lisp code, compiled or interpreted.
949 Looks for
950 (defun NAME ARGS DOCSTRING ...)
951 (defmacro NAME ARGS DOCSTRING ...)
952 (defsubst NAME ARGS DOCSTRING ...)
953 (autoload (quote NAME) FILE DOCSTRING ...)
954 (defvar NAME VALUE DOCSTRING)
955 (defconst NAME VALUE DOCSTRING)
956 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
957 (fset (quote NAME) #[... DOCSTRING ...])
958 (defalias (quote NAME) #[... DOCSTRING ...])
959 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
960 starting in column zero.
961 (quote NAME) may appear as 'NAME as well.
963 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
964 When we find that, we save it for the following defining-form,
965 and we use that instead of reading a doc string within that defining-form.
967 For defvar, defconst, and fset we skip to the docstring with a kludgy
968 formatting convention: all docstrings must appear on the same line as the
969 initial open-paren (the one in column zero) and must contain a backslash
970 and a newline immediately after the initial double-quote. No newlines
971 must appear between the beginning of the form and the first double-quote.
972 For defun, defmacro, and autoload, we know how to skip over the
973 arglist, but the doc string must still have a backslash and newline
974 immediately after the double quote.
975 The only source files that must follow this convention are preloaded
976 uncompiled ones like loaddefs.el and bindings.el; aside
977 from that, it is always the .elc file that we look at, and they are no
978 problem because byte-compiler output follows this convention.
979 The NAME and DOCSTRING are output.
980 NAME is preceded by `F' for a function or `V' for a variable.
981 An entry is output only if DOCSTRING has \ newline just after the opening ".
984 static void
985 skip_white (FILE *infile)
987 char c = ' ';
988 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
989 c = getc (infile);
990 ungetc (c, infile);
993 static void
994 read_lisp_symbol (FILE *infile, char *buffer)
996 char c;
997 char *fillp = buffer;
999 skip_white (infile);
1000 while (1)
1002 c = getc (infile);
1003 if (c == '\\')
1004 *(++fillp) = getc (infile);
1005 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1007 ungetc (c, infile);
1008 *fillp = 0;
1009 break;
1011 else
1012 *fillp++ = c;
1015 if (! buffer[0])
1016 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1018 skip_white (infile);
1021 static int
1022 search_lisp_doc_at_eol (FILE *infile)
1024 char c = 0, c1 = 0, c2 = 0;
1026 /* Skip until the end of line; remember two previous chars. */
1027 while (c != '\n' && c != '\r' && c != EOF)
1029 c2 = c1;
1030 c1 = c;
1031 c = getc (infile);
1034 /* If two previous characters were " and \,
1035 this is a doc string. Otherwise, there is none. */
1036 if (c2 != '"' || c1 != '\\')
1038 #ifdef DEBUG
1039 fprintf (stderr, "## non-docstring in %s (%s)\n",
1040 buffer, filename);
1041 #endif
1042 if (c != EOF)
1043 ungetc (c, infile);
1044 return 0;
1046 return 1;
1049 static int
1050 scan_lisp_file (const char *filename, const char *mode)
1052 FILE *infile;
1053 register int c;
1054 char *saved_string = 0;
1056 if (generate_globals)
1057 fatal ("scanning lisp file when -g specified", 0);
1059 infile = fopen (filename, mode);
1060 if (infile == NULL)
1062 perror (filename);
1063 return 0; /* No error. */
1066 c = '\n';
1067 while (!feof (infile))
1069 char buffer[BUFSIZ];
1070 char type;
1072 /* If not at end of line, skip till we get to one. */
1073 if (c != '\n' && c != '\r')
1075 c = getc (infile);
1076 continue;
1078 /* Skip the line break. */
1079 while (c == '\n' || c == '\r')
1080 c = getc (infile);
1081 /* Detect a dynamic doc string and save it for the next expression. */
1082 if (c == '#')
1084 c = getc (infile);
1085 if (c == '@')
1087 size_t length = 0;
1088 size_t i;
1090 /* Read the length. */
1091 while ((c = getc (infile),
1092 c >= '0' && c <= '9'))
1094 length *= 10;
1095 length += c - '0';
1098 if (length <= 1)
1099 fatal ("invalid dynamic doc string length", "");
1101 if (c != ' ')
1102 fatal ("space not found after dynamic doc string length", "");
1104 /* The next character is a space that is counted in the length
1105 but not part of the doc string.
1106 We already read it, so just ignore it. */
1107 length--;
1109 /* Read in the contents. */
1110 free (saved_string);
1111 saved_string = (char *) xmalloc (length);
1112 for (i = 0; i < length; i++)
1113 saved_string[i] = getc (infile);
1114 /* The last character is a ^_.
1115 That is needed in the .elc file
1116 but it is redundant in DOC. So get rid of it here. */
1117 saved_string[length - 1] = 0;
1118 /* Skip the line break. */
1119 while (c == '\n' || c == '\r')
1120 c = getc (infile);
1121 /* Skip the following line. */
1122 while (c != '\n' && c != '\r')
1123 c = getc (infile);
1125 continue;
1128 if (c != '(')
1129 continue;
1131 read_lisp_symbol (infile, buffer);
1133 if (! strcmp (buffer, "defun")
1134 || ! strcmp (buffer, "defmacro")
1135 || ! strcmp (buffer, "defsubst"))
1137 type = 'F';
1138 read_lisp_symbol (infile, buffer);
1140 /* Skip the arguments: either "nil" or a list in parens. */
1142 c = getc (infile);
1143 if (c == 'n') /* nil */
1145 if ((c = getc (infile)) != 'i'
1146 || (c = getc (infile)) != 'l')
1148 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1149 buffer, filename);
1150 continue;
1153 else if (c != '(')
1155 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1156 buffer, filename);
1157 continue;
1159 else
1160 while (c != ')')
1161 c = getc (infile);
1162 skip_white (infile);
1164 /* If the next three characters aren't `dquote bslash newline'
1165 then we're not reading a docstring.
1167 if ((c = getc (infile)) != '"'
1168 || (c = getc (infile)) != '\\'
1169 || ((c = getc (infile)) != '\n' && c != '\r'))
1171 #ifdef DEBUG
1172 fprintf (stderr, "## non-docstring in %s (%s)\n",
1173 buffer, filename);
1174 #endif
1175 continue;
1179 /* defcustom can only occur in uncompiled Lisp files. */
1180 else if (! strcmp (buffer, "defvar")
1181 || ! strcmp (buffer, "defconst")
1182 || ! strcmp (buffer, "defcustom"))
1184 type = 'V';
1185 read_lisp_symbol (infile, buffer);
1187 if (saved_string == 0)
1188 if (!search_lisp_doc_at_eol (infile))
1189 continue;
1192 else if (! strcmp (buffer, "custom-declare-variable")
1193 || ! strcmp (buffer, "defvaralias")
1196 type = 'V';
1198 c = getc (infile);
1199 if (c == '\'')
1200 read_lisp_symbol (infile, buffer);
1201 else
1203 if (c != '(')
1205 fprintf (stderr,
1206 "## unparsable name in custom-declare-variable in %s\n",
1207 filename);
1208 continue;
1210 read_lisp_symbol (infile, buffer);
1211 if (strcmp (buffer, "quote"))
1213 fprintf (stderr,
1214 "## unparsable name in custom-declare-variable in %s\n",
1215 filename);
1216 continue;
1218 read_lisp_symbol (infile, buffer);
1219 c = getc (infile);
1220 if (c != ')')
1222 fprintf (stderr,
1223 "## unparsable quoted name in custom-declare-variable in %s\n",
1224 filename);
1225 continue;
1229 if (saved_string == 0)
1230 if (!search_lisp_doc_at_eol (infile))
1231 continue;
1234 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1236 type = 'F';
1238 c = getc (infile);
1239 if (c == '\'')
1240 read_lisp_symbol (infile, buffer);
1241 else
1243 if (c != '(')
1245 fprintf (stderr, "## unparsable name in fset in %s\n",
1246 filename);
1247 continue;
1249 read_lisp_symbol (infile, buffer);
1250 if (strcmp (buffer, "quote"))
1252 fprintf (stderr, "## unparsable name in fset in %s\n",
1253 filename);
1254 continue;
1256 read_lisp_symbol (infile, buffer);
1257 c = getc (infile);
1258 if (c != ')')
1260 fprintf (stderr,
1261 "## unparsable quoted name in fset in %s\n",
1262 filename);
1263 continue;
1267 if (saved_string == 0)
1268 if (!search_lisp_doc_at_eol (infile))
1269 continue;
1272 else if (! strcmp (buffer, "autoload"))
1274 type = 'F';
1275 c = getc (infile);
1276 if (c == '\'')
1277 read_lisp_symbol (infile, buffer);
1278 else
1280 if (c != '(')
1282 fprintf (stderr, "## unparsable name in autoload in %s\n",
1283 filename);
1284 continue;
1286 read_lisp_symbol (infile, buffer);
1287 if (strcmp (buffer, "quote"))
1289 fprintf (stderr, "## unparsable name in autoload in %s\n",
1290 filename);
1291 continue;
1293 read_lisp_symbol (infile, buffer);
1294 c = getc (infile);
1295 if (c != ')')
1297 fprintf (stderr,
1298 "## unparsable quoted name in autoload in %s\n",
1299 filename);
1300 continue;
1303 skip_white (infile);
1304 if ((c = getc (infile)) != '\"')
1306 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1307 buffer, filename);
1308 continue;
1310 read_c_string_or_comment (infile, 0, 0, 0);
1312 if (saved_string == 0)
1313 if (!search_lisp_doc_at_eol (infile))
1314 continue;
1317 #ifdef DEBUG
1318 else if (! strcmp (buffer, "if")
1319 || ! strcmp (buffer, "byte-code"))
1320 continue;
1321 #endif
1323 else
1325 #ifdef DEBUG
1326 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1327 buffer, filename);
1328 #endif
1329 continue;
1332 /* At this point, we should either use the previous dynamic doc string in
1333 saved_string or gobble a doc string from the input file.
1334 In the latter case, the opening quote (and leading backslash-newline)
1335 have already been read. */
1337 putc (037, outfile);
1338 putc (type, outfile);
1339 fprintf (outfile, "%s\n", buffer);
1340 if (saved_string)
1342 fputs (saved_string, outfile);
1343 /* Don't use one dynamic doc string twice. */
1344 free (saved_string);
1345 saved_string = 0;
1347 else
1348 read_c_string_or_comment (infile, 1, 0, 0);
1350 fclose (infile);
1351 return 0;
1355 /* make-docfile.c ends here */