1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985, 86, 92, 93, 94, 97, 1999, 2000, 01, 2004
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
37 #define NO_SHORTNAMES /* Tell config not to load remap.h */
40 /* defined to be emacs_main, sys_fopen, etc. in config.h */
53 #endif /* WINDOWSNT */
56 #define READ_TEXT "rt"
57 #define READ_BINARY "rb"
58 #else /* not DOS_NT */
60 #define READ_BINARY "r"
61 #endif /* not DOS_NT */
64 int scan_lisp_file ();
68 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
69 file where that function is defined. */
77 /* Stdio stream for output to the DOC file. */
80 /* Name this program was invoked with. */
83 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
90 fprintf (stderr
, "%s: ", progname
);
91 fprintf (stderr
, s1
, s2
);
92 fprintf (stderr
, "\n");
95 /* Print error message and exit. */
106 /* Like malloc but get fatal error if memory is exhausted. */
112 void *result
= (void *) malloc (size
);
114 fatal ("virtual memory exhausted", 0);
131 /* Don't put CRs in the DOC file. */
134 #if 0 /* Suspicion is that this causes hanging.
135 So instead we require people to use -o on MSDOS. */
136 (stdout
)->_flag
&= ~_IOTEXT
;
137 _setmode (fileno (stdout
), O_BINARY
);
143 _setmode (fileno (stdout
), O_BINARY
);
144 #endif /* WINDOWSNT */
146 /* If first two args are -o FILE, output to FILE. */
148 if (argc
> i
+ 1 && !strcmp (argv
[i
], "-o"))
150 outfile
= fopen (argv
[i
+ 1], "w");
153 if (argc
> i
+ 1 && !strcmp (argv
[i
], "-a"))
155 outfile
= fopen (argv
[i
+ 1], "a");
158 if (argc
> i
+ 1 && !strcmp (argv
[i
], "-d"))
165 fatal ("No output file specified", "");
168 for (; i
< argc
; i
++)
171 /* Don't process one file twice. */
172 for (j
= first_infile
; j
< i
; j
++)
173 if (! strcmp (argv
[i
], argv
[j
]))
176 err_count
+= scan_file (argv
[i
]);
178 return (err_count
> 0 ? EXIT_FAILURE
: EXIT_SUCCESS
);
181 /* Add a source file name boundary marker in the output file. */
183 put_filename (filename
)
186 char *tmp
= filename
;
189 while ((tmp
= index (filename
, '/')))
194 fprintf (outfile
, "%s\n", filename
);
197 /* Read file FILENAME and output its doc strings to outfile. */
198 /* Return 1 if file is not found, 0 if it is found. */
204 int len
= strlen (filename
);
206 put_filename (filename
);
207 if (len
> 4 && !strcmp (filename
+ len
- 4, ".elc"))
208 return scan_lisp_file (filename
, READ_BINARY
);
209 else if (len
> 3 && !strcmp (filename
+ len
- 3, ".el"))
210 return scan_lisp_file (filename
, READ_TEXT
);
212 return scan_c_file (filename
, READ_TEXT
);
217 /* Some state during the execution of `read_c_string_or_comment'. */
220 /* A count of spaces and newlines that have been read, but not output. */
221 unsigned pending_spaces
, pending_newlines
;
223 /* Where we're reading from. */
226 /* If non-zero, a buffer into which to copy characters. */
228 /* If non-zero, a file into which to copy characters. */
231 /* A keyword we look for at the beginning of lines. If found, it is
232 not copied, and SAW_KEYWORD is set to true. */
234 /* The current point we've reached in an occurance of KEYWORD in
236 char *cur_keyword_ptr
;
237 /* Set to true if we saw an occurance of KEYWORD. */
241 /* Output CH to the file or buffer in STATE. Any pending newlines or
242 spaces are output first. */
247 struct rcsoc_state
*state
;
252 if (state
->pending_newlines
> 0)
254 state
->pending_newlines
--;
257 else if (state
->pending_spaces
> 0)
259 state
->pending_spaces
--;
266 putc (out_ch
, state
->out_file
);
268 *state
->buf_ptr
++ = out_ch
;
270 while (out_ch
!= ch
);
273 /* If in the middle of scanning a keyword, continue scanning with
274 character CH, otherwise output CH to the file or buffer in STATE.
275 Any pending newlines or spaces are output first, as well as any
276 previously scanned characters that were thought to be part of a
277 keyword, but were in fact not. */
280 scan_keyword_or_put_char (ch
, state
)
282 struct rcsoc_state
*state
;
285 && *state
->cur_keyword_ptr
== ch
286 && (state
->cur_keyword_ptr
> state
->keyword
287 || state
->pending_newlines
> 0))
288 /* We might be looking at STATE->keyword at some point.
289 Keep looking until we know for sure. */
291 if (*++state
->cur_keyword_ptr
== '\0')
292 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
294 state
->saw_keyword
= 1;
296 /* Reset the scanning pointer. */
297 state
->cur_keyword_ptr
= state
->keyword
;
299 /* Canonicalize whitespace preceding a usage string. */
300 state
->pending_newlines
= 2;
301 state
->pending_spaces
= 0;
303 /* Skip any whitespace between the keyword and the
306 ch
= getc (state
->in_file
);
307 while (ch
== ' ' || ch
== '\n');
309 /* Output the open-paren we just read. */
310 put_char (ch
, state
);
312 /* Skip the function name and replace it with `fn'. */
314 ch
= getc (state
->in_file
);
315 while (ch
!= ' ' && ch
!= ')');
316 put_char ('f', state
);
317 put_char ('n', state
);
319 /* Put back the last character. */
320 ungetc (ch
, state
->in_file
);
325 if (state
->keyword
&& state
->cur_keyword_ptr
> state
->keyword
)
326 /* We scanned the beginning of a potential usage
327 keyword, but it was a false alarm. Output the
332 for (p
= state
->keyword
; p
< state
->cur_keyword_ptr
; p
++)
333 put_char (*p
, state
);
335 state
->cur_keyword_ptr
= state
->keyword
;
338 put_char (ch
, state
);
343 /* Skip a C string or C-style comment from INFILE, and return the
344 character that follows. COMMENT non-zero means skip a comment. If
345 PRINTFLAG is positive, output string contents to outfile. If it is
346 negative, store contents in buf. Convert escape sequences \n and
347 \t to newline and tab; discard \ followed by newline.
348 If SAW_USAGE is non-zero, then any occurances of the string `usage:'
349 at the beginning of a line will be removed, and *SAW_USAGE set to
350 true if any were encountered. */
353 read_c_string_or_comment (infile
, printflag
, comment
, saw_usage
)
360 struct rcsoc_state state
;
362 state
.in_file
= infile
;
363 state
.buf_ptr
= (printflag
< 0 ? buf
: 0);
364 state
.out_file
= (printflag
> 0 ? outfile
: 0);
365 state
.pending_spaces
= 0;
366 state
.pending_newlines
= 0;
367 state
.keyword
= (saw_usage
? "usage:" : 0);
368 state
.cur_keyword_ptr
= state
.keyword
;
369 state
.saw_keyword
= 0;
373 while (c
== '\n' || c
== '\r' || c
== '\t' || c
== ' ')
378 while (c
!= EOF
&& (comment
? c
!= '*' : c
!= '"'))
383 if (c
== '\n' || c
== '\r')
395 state
.pending_spaces
++;
398 state
.pending_newlines
++;
399 state
.pending_spaces
= 0;
402 scan_keyword_or_put_char (c
, &state
);
418 scan_keyword_or_put_char ('*', &state
);
425 /* If we had a "", concatenate the two strings. */
434 *saw_usage
= state
.saw_keyword
;
441 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
442 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
445 write_c_args (out
, func
, buf
, minargs
, maxargs
)
448 int minargs
, maxargs
;
455 fprintf (out
, "(fn");
460 for (p
= buf
; *p
; p
++)
465 /* Notice when we start printing a new identifier. */
466 if ((('A' <= c
&& c
<= 'Z')
467 || ('a' <= c
&& c
<= 'z')
468 || ('0' <= c
&& c
<= '9')
480 if (minargs
== 0 && maxargs
> 0)
481 fprintf (out
, "&optional ");
491 /* Print the C argument list as it would appear in lisp:
492 print underscores as hyphens, and print commas and newlines
493 as spaces. Collapse adjacent spaces into one. */
496 else if (c
== ',' || c
== '\n')
499 /* In C code, `default' is a reserved word, so we spell it
500 `defalt'; unmangle that here. */
502 && strncmp (p
, "defalt", 6) == 0
503 && ! (('A' <= p
[6] && p
[6] <= 'Z')
504 || ('a' <= p
[6] && p
[6] <= 'z')
505 || ('0' <= p
[6] && p
[6] <= '9')
508 fprintf (out
, "DEFAULT");
513 else if (c
!= ' ' || !just_spaced
)
515 if (c
>= 'a' && c
<= 'z')
516 /* Upcase the letter. */
521 just_spaced
= c
== ' ';
526 /* Read through a c file. If a .o file is named,
527 the corresponding .c file is read instead.
528 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
529 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
532 scan_c_file (filename
, mode
)
533 char *filename
, *mode
;
538 register int defunflag
;
539 register int defvarperbufferflag
;
540 register int defvarflag
;
541 int minargs
, maxargs
;
542 int extension
= filename
[strlen (filename
) - 1];
544 if (extension
== 'o')
545 filename
[strlen (filename
) - 1] = 'c';
547 infile
= fopen (filename
, mode
);
549 /* No error if non-ex input file */
556 /* Reset extension to be able to detect duplicate files. */
557 filename
[strlen (filename
) - 1] = extension
;
560 while (!feof (infile
))
564 if (c
!= '\n' && c
!= '\r')
599 defvarperbufferflag
= (c
== 'P');
612 defunflag
= c
== 'U';
624 /* Lisp variable or function name. */
628 c
= read_c_string_or_comment (infile
, -1, 0, 0);
630 /* DEFVAR_LISP ("name", addr, "doc")
631 DEFVAR_LISP ("name", addr /\* doc *\/)
632 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
636 else if (defvarperbufferflag
)
640 else /* For DEFSIMPLE and DEFPRED */
649 if (defunflag
&& (commas
== 1 || commas
== 2))
653 while (c
== ' ' || c
== '\n' || c
== '\r' || c
== '\t');
657 if (commas
== 2) /* pick up minargs */
658 fscanf (infile
, "%d", &minargs
);
659 else /* pick up maxargs */
660 if (c
== 'M' || c
== 'U') /* MANY || UNEVALLED */
663 fscanf (infile
, "%d", &maxargs
);
672 while (c
== ' ' || c
== '\n' || c
== '\r' || c
== '\t')
676 c
= read_c_string_or_comment (infile
, 0, 0, 0);
678 while (c
!= EOF
&& c
!= ',' && c
!= '/')
683 while (c
== ' ' || c
== '\n' || c
== '\r' || c
== '\t')
685 while ((c
>= 'a' && c
<= 'z') || (c
>= 'Z' && c
<= 'Z'))
691 while (c
== ' ' || c
== '\n' || c
== '\r' || c
== '\t')
698 && (c
= getc (infile
),
702 int comment
= c
!= '"';
706 putc (defvarflag
? 'V' : 'F', outfile
);
707 fprintf (outfile
, "%s\n", buf
);
710 getc (infile
); /* Skip past `*' */
711 c
= read_c_string_or_comment (infile
, 1, comment
, &saw_usage
);
713 /* If this is a defun, find the arguments and print them. If
714 this function takes MANY or UNEVALLED args, then the C source
715 won't give the names of the arguments, so we shouldn't bother
718 Various doc-string styles:
719 0: DEFUN (..., "DOC") (args) [!comment]
720 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
721 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
723 if (defunflag
&& maxargs
!= -1 && !saw_usage
)
725 char argbuf
[1024], *p
= argbuf
;
727 if (!comment
|| doc_keyword
)
735 /* Skip into arguments. */
742 /* Copy arguments into ARGBUF. */
745 *p
++ = c
= getc (infile
);
749 fprintf (outfile
, "\n\n");
750 write_c_args (outfile
, buf
, argbuf
, minargs
, maxargs
);
752 else if (defunflag
&& maxargs
== -1 && !saw_usage
)
753 /* The DOC should provide the usage form. */
754 fprintf (stderr
, "Missing `usage' for function `%s'.\n", buf
);
762 /* Read a file of Lisp code, compiled or interpreted.
764 (defun NAME ARGS DOCSTRING ...)
765 (defmacro NAME ARGS DOCSTRING ...)
766 (defsubst NAME ARGS DOCSTRING ...)
767 (autoload (quote NAME) FILE DOCSTRING ...)
768 (defvar NAME VALUE DOCSTRING)
769 (defconst NAME VALUE DOCSTRING)
770 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
771 (fset (quote NAME) #[... DOCSTRING ...])
772 (defalias (quote NAME) #[... DOCSTRING ...])
773 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
774 starting in column zero.
775 (quote NAME) may appear as 'NAME as well.
777 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
778 When we find that, we save it for the following defining-form,
779 and we use that instead of reading a doc string within that defining-form.
781 For defvar, defconst, and fset we skip to the docstring with a kludgy
782 formatting convention: all docstrings must appear on the same line as the
783 initial open-paren (the one in column zero) and must contain a backslash
784 and a newline immediately after the initial double-quote. No newlines
785 must appear between the beginning of the form and the first double-quote.
786 For defun, defmacro, and autoload, we know how to skip over the
787 arglist, but the doc string must still have a backslash and newline
788 immediately after the double quote.
789 The only source files that must follow this convention are preloaded
790 uncompiled ones like loaddefs.el and bindings.el; aside
791 from that, it is always the .elc file that we look at, and they are no
792 problem because byte-compiler output follows this convention.
793 The NAME and DOCSTRING are output.
794 NAME is preceded by `F' for a function or `V' for a variable.
795 An entry is output only if DOCSTRING has \ newline just after the opening "
803 while (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r')
809 read_lisp_symbol (infile
, buffer
)
814 char *fillp
= buffer
;
821 *(++fillp
) = getc (infile
);
822 else if (c
== ' ' || c
== '\t' || c
== '\n' || c
== '\r' || c
== '(' || c
== ')')
833 fprintf (stderr
, "## expected a symbol, got '%c'\n", c
);
839 scan_lisp_file (filename
, mode
)
840 char *filename
, *mode
;
844 char *saved_string
= 0;
846 infile
= fopen (filename
, mode
);
850 return 0; /* No error */
854 while (!feof (infile
))
859 /* If not at end of line, skip till we get to one. */
860 if (c
!= '\n' && c
!= '\r')
865 /* Skip the line break. */
866 while (c
== '\n' || c
== '\r')
868 /* Detect a dynamic doc string and save it for the next expression. */
877 /* Read the length. */
878 while ((c
= getc (infile
),
879 c
>= '0' && c
<= '9'))
885 /* The next character is a space that is counted in the length
886 but not part of the doc string.
887 We already read it, so just ignore it. */
890 /* Read in the contents. */
891 if (saved_string
!= 0)
893 saved_string
= (char *) malloc (length
);
894 for (i
= 0; i
< length
; i
++)
895 saved_string
[i
] = getc (infile
);
896 /* The last character is a ^_.
897 That is needed in the .elc file
898 but it is redundant in DOC. So get rid of it here. */
899 saved_string
[length
- 1] = 0;
900 /* Skip the line break. */
901 while (c
== '\n' && c
== '\r')
903 /* Skip the following line. */
904 while (c
!= '\n' && c
!= '\r')
913 read_lisp_symbol (infile
, buffer
);
915 if (! strcmp (buffer
, "defun")
916 || ! strcmp (buffer
, "defmacro")
917 || ! strcmp (buffer
, "defsubst"))
920 read_lisp_symbol (infile
, buffer
);
922 /* Skip the arguments: either "nil" or a list in parens */
925 if (c
== 'n') /* nil */
927 if ((c
= getc (infile
)) != 'i'
928 || (c
= getc (infile
)) != 'l')
930 fprintf (stderr
, "## unparsable arglist in %s (%s)\n",
937 fprintf (stderr
, "## unparsable arglist in %s (%s)\n",
946 /* If the next three characters aren't `dquote bslash newline'
947 then we're not reading a docstring.
949 if ((c
= getc (infile
)) != '"'
950 || (c
= getc (infile
)) != '\\'
951 || ((c
= getc (infile
)) != '\n' && c
!= '\r'))
954 fprintf (stderr
, "## non-docstring in %s (%s)\n",
961 else if (! strcmp (buffer
, "defvar")
962 || ! strcmp (buffer
, "defconst"))
966 read_lisp_symbol (infile
, buffer
);
968 if (saved_string
== 0)
971 /* Skip until the end of line; remember two previous chars. */
972 while (c
!= '\n' && c
!= '\r' && c
>= 0)
979 /* If two previous characters were " and \,
980 this is a doc string. Otherwise, there is none. */
981 if (c2
!= '"' || c1
!= '\\')
984 fprintf (stderr
, "## non-docstring in %s (%s)\n",
992 else if (! strcmp (buffer
, "custom-declare-variable"))
999 read_lisp_symbol (infile
, buffer
);
1005 "## unparsable name in custom-declare-variable in %s\n",
1009 read_lisp_symbol (infile
, buffer
);
1010 if (strcmp (buffer
, "quote"))
1013 "## unparsable name in custom-declare-variable in %s\n",
1017 read_lisp_symbol (infile
, buffer
);
1022 "## unparsable quoted name in custom-declare-variable in %s\n",
1028 if (saved_string
== 0)
1030 /* Skip to end of line; remember the two previous chars. */
1031 while (c
!= '\n' && c
!= '\r' && c
>= 0)
1038 /* If two previous characters were " and \,
1039 this is a doc string. Otherwise, there is none. */
1040 if (c2
!= '"' || c1
!= '\\')
1043 fprintf (stderr
, "## non-docstring in %s (%s)\n",
1051 else if (! strcmp (buffer
, "fset") || ! strcmp (buffer
, "defalias"))
1053 char c1
= 0, c2
= 0;
1058 read_lisp_symbol (infile
, buffer
);
1063 fprintf (stderr
, "## unparsable name in fset in %s\n",
1067 read_lisp_symbol (infile
, buffer
);
1068 if (strcmp (buffer
, "quote"))
1070 fprintf (stderr
, "## unparsable name in fset in %s\n",
1074 read_lisp_symbol (infile
, buffer
);
1079 "## unparsable quoted name in fset in %s\n",
1085 if (saved_string
== 0)
1087 /* Skip to end of line; remember the two previous chars. */
1088 while (c
!= '\n' && c
!= '\r' && c
>= 0)
1095 /* If two previous characters were " and \,
1096 this is a doc string. Otherwise, there is none. */
1097 if (c2
!= '"' || c1
!= '\\')
1100 fprintf (stderr
, "## non-docstring in %s (%s)\n",
1108 else if (! strcmp (buffer
, "autoload"))
1113 read_lisp_symbol (infile
, buffer
);
1118 fprintf (stderr
, "## unparsable name in autoload in %s\n",
1122 read_lisp_symbol (infile
, buffer
);
1123 if (strcmp (buffer
, "quote"))
1125 fprintf (stderr
, "## unparsable name in autoload in %s\n",
1129 read_lisp_symbol (infile
, buffer
);
1134 "## unparsable quoted name in autoload in %s\n",
1139 skip_white (infile
);
1140 if ((c
= getc (infile
)) != '\"')
1142 fprintf (stderr
, "## autoload of %s unparsable (%s)\n",
1146 read_c_string_or_comment (infile
, 0, 0, 0);
1147 skip_white (infile
);
1149 if (saved_string
== 0)
1151 /* If the next three characters aren't `dquote bslash newline'
1152 then we're not reading a docstring. */
1153 if ((c
= getc (infile
)) != '"'
1154 || (c
= getc (infile
)) != '\\'
1155 || ((c
= getc (infile
)) != '\n' && c
!= '\r'))
1158 fprintf (stderr
, "## non-docstring in %s (%s)\n",
1167 else if (! strcmp (buffer
, "if")
1168 || ! strcmp (buffer
, "byte-code"))
1175 fprintf (stderr
, "## unrecognised top-level form, %s (%s)\n",
1181 /* At this point, we should either use the previous
1182 dynamic doc string in saved_string
1183 or gobble a doc string from the input file.
1185 In the latter case, the opening quote (and leading
1186 backslash-newline) have already been read. */
1188 putc (037, outfile
);
1189 putc (type
, outfile
);
1190 fprintf (outfile
, "%s\n", buffer
);
1193 fputs (saved_string
, outfile
);
1194 /* Don't use one dynamic doc string twice. */
1195 free (saved_string
);
1199 read_c_string_or_comment (infile
, 1, 0, 0);
1205 /* arch-tag: f7203aaf-991a-4238-acb5-601db56f2894
1206 (do not change this comment) */