Started work on string reordering. Just compiled, not yet tested.
[emacs.git] / lib-src / make-docfile.c
blob9b804684a1238ea8ec60b8ca60f7732cb8428a5e
1 /* Generate doc-string file for GNU Emacs from source files.
2 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2011
3 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
21 /* The arguments given to this program are all the C and Lisp source files
22 of GNU Emacs. .elc and .el and .c files are allowed.
23 A .o file can also be specified; the .c file it was made from is used.
24 This helps the makefile pass the correct list of files.
25 Option -d DIR means change to DIR before looking for files.
27 The results, which go to standard output or to a file
28 specified with -a or -o (-a to append, -o to start from nothing),
29 are entries containing function or variable names and their documentation.
30 Each entry starts with a ^_ character.
31 Then comes F for a function or V for a variable.
32 Then comes the function or variable name, terminated with a newline.
33 Then comes the documentation for that function or variable.
36 #include <config.h>
38 /* defined to be emacs_main, sys_fopen, etc. in config.h */
39 #undef main
40 #undef fopen
41 #undef chdir
43 #include <stdio.h>
44 #include <stdlib.h>
45 #ifdef MSDOS
46 #include <fcntl.h>
47 #endif /* MSDOS */
48 #ifdef WINDOWSNT
49 #include <fcntl.h>
50 #include <direct.h>
51 #endif /* WINDOWSNT */
53 #ifdef DOS_NT
54 #define READ_TEXT "rt"
55 #define READ_BINARY "rb"
56 #else /* not DOS_NT */
57 #define READ_TEXT "r"
58 #define READ_BINARY "r"
59 #endif /* not DOS_NT */
61 #ifndef DIRECTORY_SEP
62 #define DIRECTORY_SEP '/'
63 #endif
65 #ifndef IS_DIRECTORY_SEP
66 #define IS_DIRECTORY_SEP(_c_) ((_c_) == DIRECTORY_SEP)
67 #endif
69 /* Use this to suppress gcc's `...may be used before initialized' warnings. */
70 #ifdef lint
71 # define IF_LINT(Code) Code
72 #else
73 # define IF_LINT(Code) /* empty */
74 #endif
76 static int scan_file (char *filename);
77 static int scan_lisp_file (const char *filename, const char *mode);
78 static int scan_c_file (char *filename, const char *mode);
79 static void fatal (const char *s1, const char *s2) NO_RETURN;
80 static void start_globals (void);
81 static void write_globals (void);
83 #ifdef MSDOS
84 /* s/msdos.h defines this as sys_chdir, but we're not linking with the
85 file where that function is defined. */
86 #undef chdir
87 #endif
89 #include <unistd.h>
91 /* Stdio stream for output to the DOC file. */
92 FILE *outfile;
94 /* Name this program was invoked with. */
95 char *progname;
97 /* Nonzero if this invocation is generating globals.h. */
98 int generate_globals;
100 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
102 /* VARARGS1 */
103 static void
104 error (const char *s1, const char *s2)
106 fprintf (stderr, "%s: ", progname);
107 fprintf (stderr, s1, s2);
108 fprintf (stderr, "\n");
111 /* Print error message and exit. */
113 /* VARARGS1 */
114 static void
115 fatal (const char *s1, const char *s2)
117 error (s1, s2);
118 exit (EXIT_FAILURE);
121 /* Like malloc but get fatal error if memory is exhausted. */
123 static void *
124 xmalloc (unsigned int size)
126 void *result = (void *) malloc (size);
127 if (result == NULL)
128 fatal ("virtual memory exhausted", 0);
129 return result;
132 /* Like realloc but get fatal error if memory is exhausted. */
134 static void *
135 xrealloc (void *arg, unsigned int size)
137 void *result = (void *) realloc (arg, size);
138 if (result == NULL)
139 fatal ("virtual memory exhausted", 0);
140 return result;
145 main (int argc, char **argv)
147 int i;
148 int err_count = 0;
149 int first_infile;
151 progname = argv[0];
153 outfile = stdout;
155 /* Don't put CRs in the DOC file. */
156 #ifdef MSDOS
157 _fmode = O_BINARY;
158 #if 0 /* Suspicion is that this causes hanging.
159 So instead we require people to use -o on MSDOS. */
160 (stdout)->_flag &= ~_IOTEXT;
161 _setmode (fileno (stdout), O_BINARY);
162 #endif
163 outfile = 0;
164 #endif /* MSDOS */
165 #ifdef WINDOWSNT
166 _fmode = O_BINARY;
167 _setmode (fileno (stdout), O_BINARY);
168 #endif /* WINDOWSNT */
170 /* If first two args are -o FILE, output to FILE. */
171 i = 1;
172 if (argc > i + 1 && !strcmp (argv[i], "-o"))
174 outfile = fopen (argv[i + 1], "w");
175 i += 2;
177 if (argc > i + 1 && !strcmp (argv[i], "-a"))
179 outfile = fopen (argv[i + 1], "a");
180 i += 2;
182 if (argc > i + 1 && !strcmp (argv[i], "-d"))
184 if (chdir (argv[i + 1]) != 0)
186 perror (argv[i + 1]);
187 return EXIT_FAILURE;
189 i += 2;
191 if (argc > i && !strcmp (argv[i], "-g"))
193 generate_globals = 1;
194 ++i;
197 if (outfile == 0)
198 fatal ("No output file specified", "");
200 if (generate_globals)
201 start_globals ();
203 first_infile = i;
204 for (; i < argc; i++)
206 int j;
207 /* Don't process one file twice. */
208 for (j = first_infile; j < i; j++)
209 if (! strcmp (argv[i], argv[j]))
210 break;
211 if (j == i)
212 err_count += scan_file (argv[i]);
215 if (err_count == 0 && generate_globals)
216 write_globals ();
218 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
221 /* Add a source file name boundary marker in the output file. */
222 static void
223 put_filename (char *filename)
225 char *tmp;
227 for (tmp = filename; *tmp; tmp++)
229 if (IS_DIRECTORY_SEP(*tmp))
230 filename = tmp + 1;
233 putc (037, outfile);
234 putc ('S', outfile);
235 fprintf (outfile, "%s\n", filename);
238 /* Read file FILENAME and output its doc strings to outfile. */
239 /* Return 1 if file is not found, 0 if it is found. */
241 static int
242 scan_file (char *filename)
245 size_t len = strlen (filename);
247 if (!generate_globals)
248 put_filename (filename);
249 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
250 return scan_lisp_file (filename, READ_BINARY);
251 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
252 return scan_lisp_file (filename, READ_TEXT);
253 else
254 return scan_c_file (filename, READ_TEXT);
257 static void
258 start_globals (void)
260 fprintf (outfile, "/* This file was auto-generated by make-docfile. */\n");
261 fprintf (outfile, "/* DO NOT EDIT. */\n");
262 fprintf (outfile, "struct emacs_globals {\n");
265 static char input_buffer[128];
267 /* Some state during the execution of `read_c_string_or_comment'. */
268 struct rcsoc_state
270 /* A count of spaces and newlines that have been read, but not output. */
271 unsigned pending_spaces, pending_newlines;
273 /* Where we're reading from. */
274 FILE *in_file;
276 /* If non-zero, a buffer into which to copy characters. */
277 char *buf_ptr;
278 /* If non-zero, a file into which to copy characters. */
279 FILE *out_file;
281 /* A keyword we look for at the beginning of lines. If found, it is
282 not copied, and SAW_KEYWORD is set to true. */
283 const char *keyword;
284 /* The current point we've reached in an occurrence of KEYWORD in
285 the input stream. */
286 const char *cur_keyword_ptr;
287 /* Set to true if we saw an occurrence of KEYWORD. */
288 int saw_keyword;
291 /* Output CH to the file or buffer in STATE. Any pending newlines or
292 spaces are output first. */
294 static INLINE void
295 put_char (int ch, struct rcsoc_state *state)
297 int out_ch;
300 if (state->pending_newlines > 0)
302 state->pending_newlines--;
303 out_ch = '\n';
305 else if (state->pending_spaces > 0)
307 state->pending_spaces--;
308 out_ch = ' ';
310 else
311 out_ch = ch;
313 if (state->out_file)
314 putc (out_ch, state->out_file);
315 if (state->buf_ptr)
316 *state->buf_ptr++ = out_ch;
318 while (out_ch != ch);
321 /* If in the middle of scanning a keyword, continue scanning with
322 character CH, otherwise output CH to the file or buffer in STATE.
323 Any pending newlines or spaces are output first, as well as any
324 previously scanned characters that were thought to be part of a
325 keyword, but were in fact not. */
327 static void
328 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
330 if (state->keyword
331 && *state->cur_keyword_ptr == ch
332 && (state->cur_keyword_ptr > state->keyword
333 || state->pending_newlines > 0))
334 /* We might be looking at STATE->keyword at some point.
335 Keep looking until we know for sure. */
337 if (*++state->cur_keyword_ptr == '\0')
338 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
340 state->saw_keyword = 1;
342 /* Reset the scanning pointer. */
343 state->cur_keyword_ptr = state->keyword;
345 /* Canonicalize whitespace preceding a usage string. */
346 state->pending_newlines = 2;
347 state->pending_spaces = 0;
349 /* Skip any whitespace between the keyword and the
350 usage string. */
352 ch = getc (state->in_file);
353 while (ch == ' ' || ch == '\n');
355 /* Output the open-paren we just read. */
356 put_char (ch, state);
358 /* Skip the function name and replace it with `fn'. */
360 ch = getc (state->in_file);
361 while (ch != ' ' && ch != ')');
362 put_char ('f', state);
363 put_char ('n', state);
365 /* Put back the last character. */
366 ungetc (ch, state->in_file);
369 else
371 if (state->keyword && state->cur_keyword_ptr > state->keyword)
372 /* We scanned the beginning of a potential usage
373 keyword, but it was a false alarm. Output the
374 part we scanned. */
376 const char *p;
378 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
379 put_char (*p, state);
381 state->cur_keyword_ptr = state->keyword;
384 put_char (ch, state);
389 /* Skip a C string or C-style comment from INFILE, and return the
390 character that follows. COMMENT non-zero means skip a comment. If
391 PRINTFLAG is positive, output string contents to outfile. If it is
392 negative, store contents in buf. Convert escape sequences \n and
393 \t to newline and tab; discard \ followed by newline.
394 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
395 at the beginning of a line will be removed, and *SAW_USAGE set to
396 true if any were encountered. */
398 static int
399 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
401 register int c;
402 struct rcsoc_state state;
404 state.in_file = infile;
405 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
406 state.out_file = (printflag > 0 ? outfile : 0);
407 state.pending_spaces = 0;
408 state.pending_newlines = 0;
409 state.keyword = (saw_usage ? "usage:" : 0);
410 state.cur_keyword_ptr = state.keyword;
411 state.saw_keyword = 0;
413 c = getc (infile);
414 if (comment)
415 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
416 c = getc (infile);
418 while (c != EOF)
420 while (c != EOF && (comment ? c != '*' : c != '"'))
422 if (c == '\\')
424 c = getc (infile);
425 if (c == '\n' || c == '\r')
427 c = getc (infile);
428 continue;
430 if (c == 'n')
431 c = '\n';
432 if (c == 't')
433 c = '\t';
436 if (c == ' ')
437 state.pending_spaces++;
438 else if (c == '\n')
440 state.pending_newlines++;
441 state.pending_spaces = 0;
443 else
444 scan_keyword_or_put_char (c, &state);
446 c = getc (infile);
449 if (c != EOF)
450 c = getc (infile);
452 if (comment)
454 if (c == '/')
456 c = getc (infile);
457 break;
460 scan_keyword_or_put_char ('*', &state);
462 else
464 if (c != '"')
465 break;
467 /* If we had a "", concatenate the two strings. */
468 c = getc (infile);
472 if (printflag < 0)
473 *state.buf_ptr = 0;
475 if (saw_usage)
476 *saw_usage = state.saw_keyword;
478 return c;
483 /* Write to file OUT the argument names of function FUNC, whose text is in BUF.
484 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
486 static void
487 write_c_args (FILE *out, char *func, char *buf, int minargs, int maxargs)
489 register char *p;
490 int in_ident = 0;
491 char *ident_start IF_LINT (= NULL);
492 size_t ident_length = 0;
494 fprintf (out, "(fn");
496 if (*buf == '(')
497 ++buf;
499 for (p = buf; *p; p++)
501 char c = *p;
503 /* Notice when a new identifier starts. */
504 if ((('A' <= c && c <= 'Z')
505 || ('a' <= c && c <= 'z')
506 || ('0' <= c && c <= '9')
507 || c == '_')
508 != in_ident)
510 if (!in_ident)
512 in_ident = 1;
513 ident_start = p;
515 else
517 in_ident = 0;
518 ident_length = p - ident_start;
522 /* Found the end of an argument, write out the last seen
523 identifier. */
524 if (c == ',' || c == ')')
526 if (ident_length == 0)
528 error ("empty arg list for `%s' should be (void), not ()", func);
529 continue;
532 if (strncmp (ident_start, "void", ident_length) == 0)
533 continue;
535 putc (' ', out);
537 if (minargs == 0 && maxargs > 0)
538 fprintf (out, "&optional ");
540 minargs--;
541 maxargs--;
543 /* In C code, `default' is a reserved word, so we spell it
544 `defalt'; unmangle that here. */
545 if (ident_length == 6 && strncmp (ident_start, "defalt", 6) == 0)
546 fprintf (out, "DEFAULT");
547 else
548 while (ident_length-- > 0)
550 c = *ident_start++;
551 if (c >= 'a' && c <= 'z')
552 /* Upcase the letter. */
553 c += 'A' - 'a';
554 else if (c == '_')
555 /* Print underscore as hyphen. */
556 c = '-';
557 putc (c, out);
562 putc (')', out);
565 /* The types of globals. */
566 enum global_type
568 EMACS_INTEGER,
569 BOOLEAN,
570 LISP_OBJECT,
571 INVALID
574 /* A single global. */
575 struct global
577 enum global_type type;
578 char *name;
581 /* All the variable names we saw while scanning C sources in `-g'
582 mode. */
583 int num_globals;
584 int num_globals_allocated;
585 struct global *globals;
587 static void
588 add_global (enum global_type type, char *name)
590 /* Ignore the one non-symbol that can occur. */
591 if (strcmp (name, "..."))
593 ++num_globals;
595 if (num_globals_allocated == 0)
597 num_globals_allocated = 100;
598 globals = xmalloc (num_globals_allocated * sizeof (struct global));
600 else if (num_globals == num_globals_allocated)
602 num_globals_allocated *= 2;
603 globals = xrealloc (globals,
604 num_globals_allocated * sizeof (struct global));
607 globals[num_globals - 1].type = type;
608 globals[num_globals - 1].name = name;
612 static int
613 compare_globals (const void *a, const void *b)
615 const struct global *ga = a;
616 const struct global *gb = b;
617 return strcmp (ga->name, gb->name);
620 static void
621 write_globals (void)
623 int i;
624 qsort (globals, num_globals, sizeof (struct global), compare_globals);
625 for (i = 0; i < num_globals; ++i)
627 char const *type;
629 switch (globals[i].type)
631 case EMACS_INTEGER:
632 type = "EMACS_INT";
633 break;
634 case BOOLEAN:
635 type = "int";
636 break;
637 case LISP_OBJECT:
638 type = "Lisp_Object";
639 break;
640 default:
641 fatal ("not a recognized DEFVAR_", 0);
644 fprintf (outfile, " %s f_%s;\n", type, globals[i].name);
645 fprintf (outfile, "#define %s globals.f_%s\n",
646 globals[i].name, globals[i].name);
647 while (i + 1 < num_globals
648 && !strcmp (globals[i].name, globals[i + 1].name))
649 ++i;
652 fprintf (outfile, "};\n");
653 fprintf (outfile, "extern struct emacs_globals globals;\n");
657 /* Read through a c file. If a .o file is named,
658 the corresponding .c or .m file is read instead.
659 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
660 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
662 static int
663 scan_c_file (char *filename, const char *mode)
665 FILE *infile;
666 register int c;
667 register int commas;
668 int minargs, maxargs;
669 int extension = filename[strlen (filename) - 1];
671 if (extension == 'o')
672 filename[strlen (filename) - 1] = 'c';
674 infile = fopen (filename, mode);
676 if (infile == NULL && extension == 'o')
678 /* try .m */
679 filename[strlen (filename) - 1] = 'm';
680 infile = fopen (filename, mode);
681 if (infile == NULL)
682 filename[strlen (filename) - 1] = 'c'; /* don't confuse people */
685 /* No error if non-ex input file */
686 if (infile == NULL)
688 perror (filename);
689 return 0;
692 /* Reset extension to be able to detect duplicate files. */
693 filename[strlen (filename) - 1] = extension;
695 c = '\n';
696 while (!feof (infile))
698 int doc_keyword = 0;
699 int defunflag = 0;
700 int defvarperbufferflag = 0;
701 int defvarflag = 0;
702 enum global_type type = INVALID;
704 if (c != '\n' && c != '\r')
706 c = getc (infile);
707 continue;
709 c = getc (infile);
710 if (c == ' ')
712 while (c == ' ')
713 c = getc (infile);
714 if (c != 'D')
715 continue;
716 c = getc (infile);
717 if (c != 'E')
718 continue;
719 c = getc (infile);
720 if (c != 'F')
721 continue;
722 c = getc (infile);
723 if (c != 'V')
724 continue;
725 c = getc (infile);
726 if (c != 'A')
727 continue;
728 c = getc (infile);
729 if (c != 'R')
730 continue;
731 c = getc (infile);
732 if (c != '_')
733 continue;
735 defvarflag = 1;
737 c = getc (infile);
738 defvarperbufferflag = (c == 'P');
739 if (generate_globals)
741 if (c == 'I')
742 type = EMACS_INTEGER;
743 else if (c == 'L')
744 type = LISP_OBJECT;
745 else if (c == 'B')
746 type = BOOLEAN;
749 c = getc (infile);
750 /* We need to distinguish between DEFVAR_BOOL and
751 DEFVAR_BUFFER_DEFAULTS. */
752 if (generate_globals && type == BOOLEAN && c != 'O')
753 type = INVALID;
755 else if (c == 'D')
757 c = getc (infile);
758 if (c != 'E')
759 continue;
760 c = getc (infile);
761 if (c != 'F')
762 continue;
763 c = getc (infile);
764 defunflag = c == 'U';
766 else continue;
768 if (generate_globals && (!defvarflag || defvarperbufferflag
769 || type == INVALID))
770 continue;
772 while (c != '(')
774 if (c < 0)
775 goto eof;
776 c = getc (infile);
779 /* Lisp variable or function name. */
780 c = getc (infile);
781 if (c != '"')
782 continue;
783 c = read_c_string_or_comment (infile, -1, 0, 0);
785 if (generate_globals)
787 int i = 0;
788 char *name;
790 /* Skip "," and whitespace. */
793 c = getc (infile);
795 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
797 /* Read in the identifier. */
800 input_buffer[i++] = c;
801 c = getc (infile);
803 while (! (c == ',' || c == ' ' || c == '\t' ||
804 c == '\n' || c == '\r'));
805 input_buffer[i] = '\0';
807 name = xmalloc (i + 1);
808 memcpy (name, input_buffer, i + 1);
809 add_global (type, name);
810 continue;
813 /* DEFVAR_LISP ("name", addr, "doc")
814 DEFVAR_LISP ("name", addr /\* doc *\/)
815 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
817 if (defunflag)
818 commas = 5;
819 else if (defvarperbufferflag)
820 commas = 3;
821 else if (defvarflag)
822 commas = 1;
823 else /* For DEFSIMPLE and DEFPRED */
824 commas = 2;
826 while (commas)
828 if (c == ',')
830 commas--;
832 if (defunflag && (commas == 1 || commas == 2))
834 int scanned = 0;
836 c = getc (infile);
837 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
838 if (c < 0)
839 goto eof;
840 ungetc (c, infile);
841 if (commas == 2) /* pick up minargs */
842 scanned = fscanf (infile, "%d", &minargs);
843 else /* pick up maxargs */
844 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
845 maxargs = -1;
846 else
847 scanned = fscanf (infile, "%d", &maxargs);
848 if (scanned < 0)
849 goto eof;
853 if (c == EOF)
854 goto eof;
855 c = getc (infile);
858 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
859 c = getc (infile);
861 if (c == '"')
862 c = read_c_string_or_comment (infile, 0, 0, 0);
864 while (c != EOF && c != ',' && c != '/')
865 c = getc (infile);
866 if (c == ',')
868 c = getc (infile);
869 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
870 c = getc (infile);
871 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
872 c = getc (infile);
873 if (c == ':')
875 doc_keyword = 1;
876 c = getc (infile);
877 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
878 c = getc (infile);
882 if (c == '"'
883 || (c == '/'
884 && (c = getc (infile),
885 ungetc (c, infile),
886 c == '*')))
888 int comment = c != '"';
889 int saw_usage;
891 putc (037, outfile);
892 putc (defvarflag ? 'V' : 'F', outfile);
893 fprintf (outfile, "%s\n", input_buffer);
895 if (comment)
896 getc (infile); /* Skip past `*' */
897 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
899 /* If this is a defun, find the arguments and print them. If
900 this function takes MANY or UNEVALLED args, then the C source
901 won't give the names of the arguments, so we shouldn't bother
902 trying to find them.
904 Various doc-string styles:
905 0: DEFUN (..., "DOC") (args) [!comment]
906 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
907 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
909 if (defunflag && maxargs != -1 && !saw_usage)
911 char argbuf[1024], *p = argbuf;
913 if (!comment || doc_keyword)
914 while (c != ')')
916 if (c < 0)
917 goto eof;
918 c = getc (infile);
921 /* Skip into arguments. */
922 while (c != '(')
924 if (c < 0)
925 goto eof;
926 c = getc (infile);
928 /* Copy arguments into ARGBUF. */
929 *p++ = c;
931 *p++ = c = getc (infile);
932 while (c != ')');
933 *p = '\0';
934 /* Output them. */
935 fprintf (outfile, "\n\n");
936 write_c_args (outfile, input_buffer, argbuf, minargs, maxargs);
938 else if (defunflag && maxargs == -1 && !saw_usage)
939 /* The DOC should provide the usage form. */
940 fprintf (stderr, "Missing `usage' for function `%s'.\n",
941 input_buffer);
944 eof:
945 fclose (infile);
946 return 0;
949 /* Read a file of Lisp code, compiled or interpreted.
950 Looks for
951 (defun NAME ARGS DOCSTRING ...)
952 (defmacro NAME ARGS DOCSTRING ...)
953 (defsubst NAME ARGS DOCSTRING ...)
954 (autoload (quote NAME) FILE DOCSTRING ...)
955 (defvar NAME VALUE DOCSTRING)
956 (defconst NAME VALUE DOCSTRING)
957 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
958 (fset (quote NAME) #[... DOCSTRING ...])
959 (defalias (quote NAME) #[... DOCSTRING ...])
960 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
961 starting in column zero.
962 (quote NAME) may appear as 'NAME as well.
964 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
965 When we find that, we save it for the following defining-form,
966 and we use that instead of reading a doc string within that defining-form.
968 For defvar, defconst, and fset we skip to the docstring with a kludgy
969 formatting convention: all docstrings must appear on the same line as the
970 initial open-paren (the one in column zero) and must contain a backslash
971 and a newline immediately after the initial double-quote. No newlines
972 must appear between the beginning of the form and the first double-quote.
973 For defun, defmacro, and autoload, we know how to skip over the
974 arglist, but the doc string must still have a backslash and newline
975 immediately after the double quote.
976 The only source files that must follow this convention are preloaded
977 uncompiled ones like loaddefs.el and bindings.el; aside
978 from that, it is always the .elc file that we look at, and they are no
979 problem because byte-compiler output follows this convention.
980 The NAME and DOCSTRING are output.
981 NAME is preceded by `F' for a function or `V' for a variable.
982 An entry is output only if DOCSTRING has \ newline just after the opening "
985 static void
986 skip_white (FILE *infile)
988 char c = ' ';
989 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
990 c = getc (infile);
991 ungetc (c, infile);
994 static void
995 read_lisp_symbol (FILE *infile, char *buffer)
997 char c;
998 char *fillp = buffer;
1000 skip_white (infile);
1001 while (1)
1003 c = getc (infile);
1004 if (c == '\\')
1005 *(++fillp) = getc (infile);
1006 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1008 ungetc (c, infile);
1009 *fillp = 0;
1010 break;
1012 else
1013 *fillp++ = c;
1016 if (! buffer[0])
1017 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1019 skip_white (infile);
1022 static int
1023 scan_lisp_file (const char *filename, const char *mode)
1025 FILE *infile;
1026 register int c;
1027 char *saved_string = 0;
1029 if (generate_globals)
1030 fatal ("scanning lisp file when -g specified", 0);
1032 infile = fopen (filename, mode);
1033 if (infile == NULL)
1035 perror (filename);
1036 return 0; /* No error */
1039 c = '\n';
1040 while (!feof (infile))
1042 char buffer[BUFSIZ];
1043 char type;
1045 /* If not at end of line, skip till we get to one. */
1046 if (c != '\n' && c != '\r')
1048 c = getc (infile);
1049 continue;
1051 /* Skip the line break. */
1052 while (c == '\n' || c == '\r')
1053 c = getc (infile);
1054 /* Detect a dynamic doc string and save it for the next expression. */
1055 if (c == '#')
1057 c = getc (infile);
1058 if (c == '@')
1060 size_t length = 0;
1061 size_t i;
1063 /* Read the length. */
1064 while ((c = getc (infile),
1065 c >= '0' && c <= '9'))
1067 length *= 10;
1068 length += c - '0';
1071 if (length <= 1)
1072 fatal ("invalid dynamic doc string length", "");
1074 if (c != ' ')
1075 fatal ("space not found after dynamic doc string length", "");
1077 /* The next character is a space that is counted in the length
1078 but not part of the doc string.
1079 We already read it, so just ignore it. */
1080 length--;
1082 /* Read in the contents. */
1083 free (saved_string);
1084 saved_string = (char *) xmalloc (length);
1085 for (i = 0; i < length; i++)
1086 saved_string[i] = getc (infile);
1087 /* The last character is a ^_.
1088 That is needed in the .elc file
1089 but it is redundant in DOC. So get rid of it here. */
1090 saved_string[length - 1] = 0;
1091 /* Skip the line break. */
1092 while (c == '\n' || c == '\r')
1093 c = getc (infile);
1094 /* Skip the following line. */
1095 while (c != '\n' && c != '\r')
1096 c = getc (infile);
1098 continue;
1101 if (c != '(')
1102 continue;
1104 read_lisp_symbol (infile, buffer);
1106 if (! strcmp (buffer, "defun")
1107 || ! strcmp (buffer, "defmacro")
1108 || ! strcmp (buffer, "defsubst"))
1110 type = 'F';
1111 read_lisp_symbol (infile, buffer);
1113 /* Skip the arguments: either "nil" or a list in parens */
1115 c = getc (infile);
1116 if (c == 'n') /* nil */
1118 if ((c = getc (infile)) != 'i'
1119 || (c = getc (infile)) != 'l')
1121 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1122 buffer, filename);
1123 continue;
1126 else if (c != '(')
1128 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1129 buffer, filename);
1130 continue;
1132 else
1133 while (c != ')')
1134 c = getc (infile);
1135 skip_white (infile);
1137 /* If the next three characters aren't `dquote bslash newline'
1138 then we're not reading a docstring.
1140 if ((c = getc (infile)) != '"'
1141 || (c = getc (infile)) != '\\'
1142 || ((c = getc (infile)) != '\n' && c != '\r'))
1144 #ifdef DEBUG
1145 fprintf (stderr, "## non-docstring in %s (%s)\n",
1146 buffer, filename);
1147 #endif
1148 continue;
1152 else if (! strcmp (buffer, "defvar")
1153 || ! strcmp (buffer, "defconst"))
1155 char c1 = 0, c2 = 0;
1156 type = 'V';
1157 read_lisp_symbol (infile, buffer);
1159 if (saved_string == 0)
1162 /* Skip until the end of line; remember two previous chars. */
1163 while (c != '\n' && c != '\r' && c >= 0)
1165 c2 = c1;
1166 c1 = c;
1167 c = getc (infile);
1170 /* If two previous characters were " and \,
1171 this is a doc string. Otherwise, there is none. */
1172 if (c2 != '"' || c1 != '\\')
1174 #ifdef DEBUG
1175 fprintf (stderr, "## non-docstring in %s (%s)\n",
1176 buffer, filename);
1177 #endif
1178 continue;
1183 else if (! strcmp (buffer, "custom-declare-variable")
1184 || ! strcmp (buffer, "defvaralias")
1187 char c1 = 0, c2 = 0;
1188 type = 'V';
1190 c = getc (infile);
1191 if (c == '\'')
1192 read_lisp_symbol (infile, buffer);
1193 else
1195 if (c != '(')
1197 fprintf (stderr,
1198 "## unparsable name in custom-declare-variable in %s\n",
1199 filename);
1200 continue;
1202 read_lisp_symbol (infile, buffer);
1203 if (strcmp (buffer, "quote"))
1205 fprintf (stderr,
1206 "## unparsable name in custom-declare-variable in %s\n",
1207 filename);
1208 continue;
1210 read_lisp_symbol (infile, buffer);
1211 c = getc (infile);
1212 if (c != ')')
1214 fprintf (stderr,
1215 "## unparsable quoted name in custom-declare-variable in %s\n",
1216 filename);
1217 continue;
1221 if (saved_string == 0)
1223 /* Skip to end of line; remember the two previous chars. */
1224 while (c != '\n' && c != '\r' && c >= 0)
1226 c2 = c1;
1227 c1 = c;
1228 c = getc (infile);
1231 /* If two previous characters were " and \,
1232 this is a doc string. Otherwise, there is none. */
1233 if (c2 != '"' || c1 != '\\')
1235 #ifdef DEBUG
1236 fprintf (stderr, "## non-docstring in %s (%s)\n",
1237 buffer, filename);
1238 #endif
1239 continue;
1244 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1246 char c1 = 0, c2 = 0;
1247 type = 'F';
1249 c = getc (infile);
1250 if (c == '\'')
1251 read_lisp_symbol (infile, buffer);
1252 else
1254 if (c != '(')
1256 fprintf (stderr, "## unparsable name in fset in %s\n",
1257 filename);
1258 continue;
1260 read_lisp_symbol (infile, buffer);
1261 if (strcmp (buffer, "quote"))
1263 fprintf (stderr, "## unparsable name in fset in %s\n",
1264 filename);
1265 continue;
1267 read_lisp_symbol (infile, buffer);
1268 c = getc (infile);
1269 if (c != ')')
1271 fprintf (stderr,
1272 "## unparsable quoted name in fset in %s\n",
1273 filename);
1274 continue;
1278 if (saved_string == 0)
1280 /* Skip to end of line; remember the two previous chars. */
1281 while (c != '\n' && c != '\r' && c >= 0)
1283 c2 = c1;
1284 c1 = c;
1285 c = getc (infile);
1288 /* If two previous characters were " and \,
1289 this is a doc string. Otherwise, there is none. */
1290 if (c2 != '"' || c1 != '\\')
1292 #ifdef DEBUG
1293 fprintf (stderr, "## non-docstring in %s (%s)\n",
1294 buffer, filename);
1295 #endif
1296 continue;
1301 else if (! strcmp (buffer, "autoload"))
1303 type = 'F';
1304 c = getc (infile);
1305 if (c == '\'')
1306 read_lisp_symbol (infile, buffer);
1307 else
1309 if (c != '(')
1311 fprintf (stderr, "## unparsable name in autoload in %s\n",
1312 filename);
1313 continue;
1315 read_lisp_symbol (infile, buffer);
1316 if (strcmp (buffer, "quote"))
1318 fprintf (stderr, "## unparsable name in autoload in %s\n",
1319 filename);
1320 continue;
1322 read_lisp_symbol (infile, buffer);
1323 c = getc (infile);
1324 if (c != ')')
1326 fprintf (stderr,
1327 "## unparsable quoted name in autoload in %s\n",
1328 filename);
1329 continue;
1332 skip_white (infile);
1333 if ((c = getc (infile)) != '\"')
1335 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1336 buffer, filename);
1337 continue;
1339 read_c_string_or_comment (infile, 0, 0, 0);
1340 skip_white (infile);
1342 if (saved_string == 0)
1344 /* If the next three characters aren't `dquote bslash newline'
1345 then we're not reading a docstring. */
1346 if ((c = getc (infile)) != '"'
1347 || (c = getc (infile)) != '\\'
1348 || ((c = getc (infile)) != '\n' && c != '\r'))
1350 #ifdef DEBUG
1351 fprintf (stderr, "## non-docstring in %s (%s)\n",
1352 buffer, filename);
1353 #endif
1354 continue;
1359 #ifdef DEBUG
1360 else if (! strcmp (buffer, "if")
1361 || ! strcmp (buffer, "byte-code"))
1363 #endif
1365 else
1367 #ifdef DEBUG
1368 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1369 buffer, filename);
1370 #endif
1371 continue;
1374 /* At this point, we should either use the previous
1375 dynamic doc string in saved_string
1376 or gobble a doc string from the input file.
1378 In the latter case, the opening quote (and leading
1379 backslash-newline) have already been read. */
1381 putc (037, outfile);
1382 putc (type, outfile);
1383 fprintf (outfile, "%s\n", buffer);
1384 if (saved_string)
1386 fputs (saved_string, outfile);
1387 /* Don't use one dynamic doc string twice. */
1388 free (saved_string);
1389 saved_string = 0;
1391 else
1392 read_c_string_or_comment (infile, 1, 0, 0);
1394 fclose (infile);
1395 return 0;
1399 /* make-docfile.c ends here */