; * etc/NEWS: Fix confused documentation markers.
[emacs.git] / lib-src / make-docfile.c
blob23728e7251e6f2b556c7b47e29c2ea88dfb623c0
1 /* Generate doc-string file for GNU Emacs from source files.
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2018 Free Software
4 Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
37 #include <config.h>
39 #include <stdarg.h>
40 #include <stddef.h>
41 #include <stdint.h>
42 #include <stdlib.h>
43 #include <string.h>
45 #include <binary-io.h>
46 #include <c-ctype.h>
47 #include <intprops.h>
48 #include <min-max.h>
49 #include <unlocked-io.h>
51 #ifdef WINDOWSNT
52 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
53 is really just insurance. */
54 #undef fopen
55 #include <direct.h>
56 #endif /* WINDOWSNT */
58 #ifdef DOS_NT
59 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
60 is really just insurance.
62 Similarly, msdos defines this as sys_chdir, but we're not linking with the
63 file where that function is defined. */
64 #undef chdir
65 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
66 #else /* not DOS_NT */
67 #define IS_SLASH(c) ((c) == '/')
68 #endif /* not DOS_NT */
70 static void scan_file (char *filename);
71 static void scan_lisp_file (const char *filename, const char *mode);
72 static void scan_c_file (char *filename, const char *mode);
73 static void scan_c_stream (FILE *infile);
74 static void start_globals (void);
75 static void write_globals (void);
77 #include <unistd.h>
79 /* Name this program was invoked with. */
80 static char *progname;
82 /* True if this invocation is generating globals.h. */
83 static bool generate_globals;
85 /* Print error message. Args are like vprintf. */
87 static void ATTRIBUTE_FORMAT_PRINTF (1, 0)
88 verror (char const *m, va_list ap)
90 fprintf (stderr, "%s: ", progname);
91 vfprintf (stderr, m, ap);
92 fprintf (stderr, "\n");
95 /* Print error message. Args are like printf. */
97 static void ATTRIBUTE_FORMAT_PRINTF (1, 2)
98 error (char const *m, ...)
100 va_list ap;
101 va_start (ap, m);
102 verror (m, ap);
103 va_end (ap);
106 /* Print error message and exit. Args are like printf. */
108 static _Noreturn void ATTRIBUTE_FORMAT_PRINTF (1, 2)
109 fatal (char const *m, ...)
111 va_list ap;
112 va_start (ap, m);
113 verror (m, ap);
114 va_end (ap);
115 exit (EXIT_FAILURE);
118 static _Noreturn void
119 memory_exhausted (void)
121 fatal ("virtual memory exhausted");
124 /* Like malloc but get fatal error if memory is exhausted. */
126 static void * ATTRIBUTE_MALLOC
127 xmalloc (ptrdiff_t size)
129 void *result = malloc (size);
130 if (result == NULL)
131 memory_exhausted ();
132 return result;
135 /* Like realloc but get fatal error if memory is exhausted. */
137 static void *
138 xrealloc (void *arg, ptrdiff_t size)
140 void *result = realloc (arg, size);
141 if (result == NULL)
142 memory_exhausted ();
143 return result;
148 main (int argc, char **argv)
150 int i;
152 progname = argv[0];
154 /* If first two args are -o FILE, output to FILE. */
155 i = 1;
156 if (argc > i + 1 && !strcmp (argv[i], "-o"))
158 if (! freopen (argv[i + 1], "w", stdout))
160 perror (argv[i + 1]);
161 return EXIT_FAILURE;
163 i += 2;
165 if (argc > i + 1 && !strcmp (argv[i], "-a"))
167 if (! freopen (argv[i + 1], "a", stdout))
169 perror (argv[i + 1]);
170 return EXIT_FAILURE;
172 i += 2;
174 if (argc > i + 1 && !strcmp (argv[i], "-d"))
176 if (chdir (argv[i + 1]) != 0)
178 perror (argv[i + 1]);
179 return EXIT_FAILURE;
181 i += 2;
183 if (argc > i && !strcmp (argv[i], "-g"))
185 generate_globals = true;
186 ++i;
189 set_binary_mode (fileno (stdout), O_BINARY);
191 if (generate_globals)
192 start_globals ();
194 if (argc <= i)
195 scan_c_stream (stdin);
196 else
198 int first_infile = i;
199 for (; i < argc; i++)
201 int j;
202 /* Don't process one file twice. */
203 for (j = first_infile; j < i; j++)
204 if (strcmp (argv[i], argv[j]) == 0)
205 break;
206 if (j == i)
207 scan_file (argv[i]);
211 if (generate_globals)
212 write_globals ();
214 if (ferror (stdout) || fclose (stdout) != 0)
215 fatal ("write error");
217 return EXIT_SUCCESS;
220 /* Add a source file name boundary marker in the output file. */
221 static void
222 put_filename (char *filename)
224 char *tmp;
226 for (tmp = filename; *tmp; tmp++)
228 if (IS_DIRECTORY_SEP (*tmp))
229 filename = tmp + 1;
232 printf ("\037S%s\n", filename);
235 /* Read file FILENAME and output its doc strings to stdout.
236 Return true if file is found, false otherwise. */
238 static void
239 scan_file (char *filename)
241 ptrdiff_t len = strlen (filename);
243 if (!generate_globals)
244 put_filename (filename);
245 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
246 scan_lisp_file (filename, "rb");
247 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
248 scan_lisp_file (filename, "r");
249 else
250 scan_c_file (filename, "r");
253 static void
254 start_globals (void)
256 puts ("/* This file was auto-generated by make-docfile. */");
257 puts ("/* DO NOT EDIT. */");
258 puts ("struct emacs_globals {");
261 static char input_buffer[128];
263 /* Some state during the execution of `read_c_string_or_comment'. */
264 struct rcsoc_state
266 /* A count of spaces and newlines that have been read, but not output. */
267 intmax_t pending_spaces, pending_newlines;
269 /* Where we're reading from. */
270 FILE *in_file;
272 /* If non-zero, a buffer into which to copy characters. */
273 char *buf_ptr;
274 /* If non-zero, a file into which to copy characters. */
275 FILE *out_file;
277 /* A keyword we look for at the beginning of lines. If found, it is
278 not copied, and SAW_KEYWORD is set to true. */
279 const char *keyword;
280 /* The current point we've reached in an occurrence of KEYWORD in
281 the input stream. */
282 const char *cur_keyword_ptr;
283 /* Set to true if we saw an occurrence of KEYWORD. */
284 bool saw_keyword;
287 /* Output CH to the file or buffer in STATE. Any pending newlines or
288 spaces are output first. */
290 static void
291 put_char (char ch, struct rcsoc_state *state)
293 char out_ch;
296 if (state->pending_newlines > 0)
298 state->pending_newlines--;
299 out_ch = '\n';
301 else if (state->pending_spaces > 0)
303 state->pending_spaces--;
304 out_ch = ' ';
306 else
307 out_ch = ch;
309 if (state->out_file)
310 putc (out_ch, state->out_file);
311 if (state->buf_ptr)
312 *state->buf_ptr++ = out_ch;
314 while (out_ch != ch);
317 /* If in the middle of scanning a keyword, continue scanning with
318 character CH, otherwise output CH to the file or buffer in STATE.
319 Any pending newlines or spaces are output first, as well as any
320 previously scanned characters that were thought to be part of a
321 keyword, but were in fact not. */
323 static void
324 scan_keyword_or_put_char (char ch, struct rcsoc_state *state)
326 if (state->keyword
327 && *state->cur_keyword_ptr == ch
328 && (state->cur_keyword_ptr > state->keyword
329 || state->pending_newlines > 0))
330 /* We might be looking at STATE->keyword at some point.
331 Keep looking until we know for sure. */
333 if (*++state->cur_keyword_ptr == '\0')
334 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
336 state->saw_keyword = true;
338 /* Reset the scanning pointer. */
339 state->cur_keyword_ptr = state->keyword;
341 /* Canonicalize whitespace preceding a usage string. */
342 state->pending_newlines = 2;
343 state->pending_spaces = 0;
345 /* Skip any spaces and newlines between the keyword and the
346 usage string. */
347 int c;
349 c = getc (state->in_file);
350 while (c == ' ' || c == '\n');
352 /* Output the open-paren we just read. */
353 if (c != '(')
354 fatal ("Missing '(' after keyword");
355 put_char (c, state);
357 /* Skip the function name and replace it with `fn'. */
360 c = getc (state->in_file);
361 if (c == EOF)
362 fatal ("Unexpected EOF after keyword");
364 while (c != ' ' && c != ')');
366 put_char ('f', state);
367 put_char ('n', state);
369 /* Put back the last character. */
370 ungetc (c, state->in_file);
373 else
375 if (state->keyword && state->cur_keyword_ptr > state->keyword)
376 /* We scanned the beginning of a potential usage
377 keyword, but it was a false alarm. Output the
378 part we scanned. */
380 const char *p;
382 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
383 put_char (*p, state);
385 state->cur_keyword_ptr = state->keyword;
388 put_char (ch, state);
393 /* Skip a C string or C-style comment from INFILE, and return the
394 byte that follows, or EOF. COMMENT means skip a comment. If
395 PRINTFLAG is positive, output string contents to stdout. If it is
396 negative, store contents in buf. Convert escape sequences \n and
397 \t to newline and tab; discard \ followed by newline.
398 If SAW_USAGE is non-null, then any occurrences of the string "usage:"
399 at the beginning of a line will be removed, and *SAW_USAGE set to
400 true if any were encountered. */
402 static int
403 read_c_string_or_comment (FILE *infile, int printflag, bool comment,
404 bool *saw_usage)
406 int c;
407 struct rcsoc_state state;
409 state.in_file = infile;
410 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
411 state.out_file = (printflag > 0 ? stdout : 0);
412 state.pending_spaces = 0;
413 state.pending_newlines = 0;
414 state.keyword = (saw_usage ? "usage:" : 0);
415 state.cur_keyword_ptr = state.keyword;
416 state.saw_keyword = false;
418 c = getc (infile);
419 if (comment)
420 while (c_isspace (c))
421 c = getc (infile);
423 while (c != EOF)
425 while (c != EOF && (comment ? c != '*' : c != '"'))
427 if (c == '\\')
429 c = getc (infile);
430 switch (c)
432 case '\n': case '\r':
433 c = getc (infile);
434 continue;
435 case 'n': c = '\n'; break;
436 case 't': c = '\t'; break;
440 if (c == ' ')
441 state.pending_spaces++;
442 else if (c == '\n')
444 state.pending_newlines++;
445 state.pending_spaces = 0;
447 else
448 scan_keyword_or_put_char (c, &state);
450 c = getc (infile);
453 if (c != EOF)
454 c = getc (infile);
456 if (comment)
458 if (c == '/')
460 c = getc (infile);
461 break;
464 scan_keyword_or_put_char ('*', &state);
466 else
468 if (c != '"')
469 break;
471 /* If we had a "", concatenate the two strings. */
472 c = getc (infile);
476 if (printflag < 0)
477 *state.buf_ptr = 0;
479 if (saw_usage)
480 *saw_usage = state.saw_keyword;
482 return c;
487 /* Write to stdout the argument names of function FUNC, whose text is in BUF.
488 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
490 static void
491 write_c_args (char *func, char *buf, int minargs, int maxargs)
493 char *p;
494 bool in_ident = false;
495 char *ident_start UNINIT;
496 ptrdiff_t ident_length = 0;
498 fputs ("(fn", stdout);
500 if (*buf == '(')
501 ++buf;
503 for (p = buf; *p; p++)
505 char c = *p;
507 /* Notice when a new identifier starts. */
508 if ((c_isalnum (c) || c == '_')
509 != in_ident)
511 if (!in_ident)
513 in_ident = true;
514 ident_start = p;
516 else
518 in_ident = false;
519 ident_length = p - ident_start;
523 /* Found the end of an argument, write out the last seen
524 identifier. */
525 if (c == ',' || c == ')')
527 if (ident_length == 0)
529 error ("empty arg list for '%s' should be (void), not ()", func);
530 continue;
533 if (strncmp (ident_start, "void", ident_length) == 0)
534 continue;
536 putchar (' ');
538 if (minargs == 0 && maxargs > 0)
539 fputs ("&optional ", stdout);
541 minargs--;
542 maxargs--;
544 /* In C code, `default' is a reserved word, so we spell it
545 `defalt'; demangle that here. */
546 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
547 fputs ("DEFAULT", stdout);
548 else
549 while (ident_length-- > 0)
551 c = c_toupper (*ident_start++);
552 if (c == '_')
553 /* Print underscore as hyphen. */
554 c = '-';
555 putchar (c);
560 putchar (')');
563 /* The types of globals. These are sorted roughly in decreasing alignment
564 order to avoid allocation gaps, except that symbols and functions
565 are last. */
566 enum global_type
568 INVALID,
569 LISP_OBJECT,
570 EMACS_INTEGER,
571 BOOLEAN,
572 SYMBOL,
573 FUNCTION
576 /* A single global. */
577 struct global
579 enum global_type type;
580 char *name;
581 int flags;
582 union
584 int value;
585 char const *svalue;
586 } v;
589 /* Bit values for FLAGS field from the above. Applied for DEFUNs only. */
590 enum { DEFUN_noreturn = 1, DEFUN_const = 2, DEFUN_noinline = 4 };
592 /* All the variable names we saw while scanning C sources in `-g'
593 mode. */
594 static ptrdiff_t num_globals;
595 static ptrdiff_t num_globals_allocated;
596 static struct global *globals;
598 static struct global *
599 add_global (enum global_type type, char const *name, int value,
600 char const *svalue)
602 /* Ignore the one non-symbol that can occur. */
603 if (strcmp (name, "..."))
605 if (num_globals == num_globals_allocated)
607 ptrdiff_t num_globals_max = (min (PTRDIFF_MAX, SIZE_MAX)
608 / sizeof *globals);
609 if (num_globals_allocated == num_globals_max)
610 memory_exhausted ();
611 if (num_globals_allocated < num_globals_max / 2)
612 num_globals_allocated = 2 * num_globals_allocated + 1;
613 else
614 num_globals_allocated = num_globals_max;
615 globals = xrealloc (globals, num_globals_allocated * sizeof *globals);
618 ++num_globals;
620 ptrdiff_t namesize = strlen (name) + 1;
621 char *buf = xmalloc (namesize + (svalue ? strlen (svalue) + 1 : 0));
622 globals[num_globals - 1].type = type;
623 globals[num_globals - 1].name = strcpy (buf, name);
624 if (svalue)
625 globals[num_globals - 1].v.svalue = strcpy (buf + namesize, svalue);
626 else
627 globals[num_globals - 1].v.value = value;
628 globals[num_globals - 1].flags = 0;
629 return globals + num_globals - 1;
631 return NULL;
634 static int
635 compare_globals (const void *a, const void *b)
637 const struct global *ga = a;
638 const struct global *gb = b;
640 if (ga->type != gb->type)
641 return ga->type - gb->type;
643 /* Consider "nil" to be the least, so that iQnil is zero. That
644 way, Qnil's internal representation is zero, which is a bit faster. */
645 if (ga->type == SYMBOL)
647 bool a_nil = strcmp (ga->name, "Qnil") == 0;
648 bool b_nil = strcmp (gb->name, "Qnil") == 0;
649 if (a_nil | b_nil)
650 return b_nil - a_nil;
653 return strcmp (ga->name, gb->name);
656 static void
657 close_emacs_globals (ptrdiff_t num_symbols)
659 printf (("};\n"
660 "extern struct emacs_globals globals;\n"
661 "\n"
662 "#ifndef DEFINE_SYMBOLS\n"
663 "extern\n"
664 "#endif\n"
665 "struct Lisp_Symbol lispsym[%td];\n"),
666 num_symbols);
669 static void
670 write_globals (void)
672 ptrdiff_t i, j;
673 bool seen_defun = false;
674 ptrdiff_t symnum = 0;
675 ptrdiff_t num_symbols = 0;
676 qsort (globals, num_globals, sizeof (struct global), compare_globals);
678 j = 0;
679 for (i = 0; i < num_globals; i++)
681 while (i + 1 < num_globals
682 && strcmp (globals[i].name, globals[i + 1].name) == 0)
684 if (globals[i].type == FUNCTION
685 && globals[i].v.value != globals[i + 1].v.value)
686 error ("function '%s' defined twice with differing signatures",
687 globals[i].name);
688 free (globals[i].name);
689 i++;
691 num_symbols += globals[i].type == SYMBOL;
692 globals[j++] = globals[i];
694 num_globals = j;
696 for (i = 0; i < num_globals; ++i)
698 char const *type = 0;
700 switch (globals[i].type)
702 case EMACS_INTEGER:
703 type = "EMACS_INT";
704 break;
705 case BOOLEAN:
706 type = "bool";
707 break;
708 case LISP_OBJECT:
709 type = "Lisp_Object";
710 break;
711 case SYMBOL:
712 case FUNCTION:
713 if (!seen_defun)
715 close_emacs_globals (num_symbols);
716 putchar ('\n');
717 seen_defun = true;
719 break;
720 default:
721 fatal ("not a recognized DEFVAR_");
724 if (type)
726 printf (" %s f_%s;\n", type, globals[i].name);
727 printf ("#define %s globals.f_%s\n",
728 globals[i].name, globals[i].name);
730 else if (globals[i].type == SYMBOL)
731 printf (("#define i%s %td\n"
732 "DEFINE_LISP_SYMBOL (%s)\n"),
733 globals[i].name, symnum++, globals[i].name);
734 else
736 if (globals[i].flags & DEFUN_noreturn)
737 fputs ("_Noreturn ", stdout);
738 if (globals[i].flags & DEFUN_noinline)
739 fputs ("NO_INLINE ", stdout);
741 printf ("EXFUN (%s, ", globals[i].name);
742 if (globals[i].v.value == -1)
743 fputs ("MANY", stdout);
744 else if (globals[i].v.value == -2)
745 fputs ("UNEVALLED", stdout);
746 else
747 printf ("%d", globals[i].v.value);
748 putchar (')');
750 if (globals[i].flags & DEFUN_const)
751 fputs (" ATTRIBUTE_CONST", stdout);
753 puts (";");
757 if (!seen_defun)
758 close_emacs_globals (num_symbols);
760 puts ("#ifdef DEFINE_SYMBOLS");
761 puts ("static char const *const defsym_name[] = {");
762 for (ptrdiff_t i = 0; i < num_globals; i++)
763 if (globals[i].type == SYMBOL)
764 printf ("\t\"%s\",\n", globals[i].v.svalue);
765 puts ("};");
766 puts ("#endif");
768 puts ("#define Qnil builtin_lisp_symbol (0)");
769 puts ("#if DEFINE_NON_NIL_Q_SYMBOL_MACROS");
770 num_symbols = 0;
771 for (ptrdiff_t i = 0; i < num_globals; i++)
772 if (globals[i].type == SYMBOL && num_symbols++ != 0)
773 printf ("# define %s builtin_lisp_symbol (%td)\n",
774 globals[i].name, num_symbols - 1);
775 puts ("#endif");
779 /* Read through a c file. If a .o file is named,
780 the corresponding .c or .m file is read instead.
781 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
782 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
784 static void
785 scan_c_file (char *filename, const char *mode)
787 FILE *infile;
788 char extension = filename[strlen (filename) - 1];
790 if (extension == 'o')
791 filename[strlen (filename) - 1] = 'c';
793 infile = fopen (filename, mode);
795 if (infile == NULL && extension == 'o')
797 /* Try .m. */
798 filename[strlen (filename) - 1] = 'm';
799 infile = fopen (filename, mode);
800 if (infile == NULL)
801 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
804 if (infile == NULL)
806 perror (filename);
807 exit (EXIT_FAILURE);
810 /* Reset extension to be able to detect duplicate files. */
811 filename[strlen (filename) - 1] = extension;
812 scan_c_stream (infile);
815 /* Return 1 if next input from INFILE is equal to P, -1 if EOF,
816 0 if input doesn't match. */
818 static int
819 stream_match (FILE *infile, const char *p)
821 for (; *p; p++)
823 int c = getc (infile);
824 if (c == EOF)
825 return -1;
826 if (c != *p)
827 return 0;
829 return 1;
832 static void
833 scan_c_stream (FILE *infile)
835 int commas, minargs, maxargs;
836 int c = '\n';
838 while (!feof (infile))
840 bool doc_keyword = false;
841 bool defunflag = false;
842 bool defvarperbufferflag = false;
843 bool defvarflag = false;
844 enum global_type type = INVALID;
845 static char name[sizeof input_buffer];
847 if (c != '\n' && c != '\r')
849 c = getc (infile);
850 continue;
852 c = getc (infile);
853 if (c == ' ')
855 while (c == ' ')
856 c = getc (infile);
857 if (c != 'D')
858 continue;
859 c = getc (infile);
860 if (c != 'E')
861 continue;
862 c = getc (infile);
863 if (c != 'F')
864 continue;
865 c = getc (infile);
866 if (c == 'S')
868 c = getc (infile);
869 if (c != 'Y')
870 continue;
871 c = getc (infile);
872 if (c != 'M')
873 continue;
874 c = getc (infile);
875 if (c != ' ' && c != '\t' && c != '(')
876 continue;
877 type = SYMBOL;
879 else if (c == 'V')
881 c = getc (infile);
882 if (c != 'A')
883 continue;
884 c = getc (infile);
885 if (c != 'R')
886 continue;
887 c = getc (infile);
888 if (c != '_')
889 continue;
891 defvarflag = true;
893 c = getc (infile);
894 defvarperbufferflag = (c == 'P');
895 if (generate_globals)
897 if (c == 'I')
898 type = EMACS_INTEGER;
899 else if (c == 'L')
900 type = LISP_OBJECT;
901 else if (c == 'B')
902 type = BOOLEAN;
905 c = getc (infile);
906 /* We need to distinguish between DEFVAR_BOOL and
907 DEFVAR_BUFFER_DEFAULTS. */
908 if (generate_globals && type == BOOLEAN && c != 'O')
909 type = INVALID;
911 else
912 continue;
914 else if (c == 'D')
916 c = getc (infile);
917 if (c != 'E')
918 continue;
919 c = getc (infile);
920 if (c != 'F')
921 continue;
922 c = getc (infile);
923 defunflag = c == 'U';
925 else continue;
927 if (generate_globals
928 && (!defvarflag || defvarperbufferflag || type == INVALID)
929 && !defunflag && type != SYMBOL)
930 continue;
932 while (c != '(')
934 if (c < 0)
935 goto eof;
936 c = getc (infile);
939 if (type != SYMBOL)
941 /* Lisp variable or function name. */
942 c = getc (infile);
943 if (c != '"')
944 continue;
945 c = read_c_string_or_comment (infile, -1, false, 0);
948 if (generate_globals)
950 ptrdiff_t i = 0;
951 char const *svalue = 0;
953 /* Skip "," and whitespace. */
956 c = getc (infile);
958 while (c == ',' || c_isspace (c));
960 /* Read in the identifier. */
963 if (c < 0)
964 goto eof;
965 input_buffer[i++] = c;
966 if (sizeof input_buffer <= i)
967 fatal ("identifier too long");
968 c = getc (infile);
970 while (! (c == ',' || c_isspace (c)));
972 input_buffer[i] = '\0';
973 memcpy (name, input_buffer, i + 1);
975 if (type == SYMBOL)
978 c = getc (infile);
979 while (c_isspace (c));
981 if (c != '"')
982 continue;
983 c = read_c_string_or_comment (infile, -1, false, 0);
984 svalue = input_buffer;
987 if (!defunflag)
989 add_global (type, name, 0, svalue);
990 continue;
994 if (type == SYMBOL)
995 continue;
997 /* DEFVAR_LISP ("name", addr, "doc")
998 DEFVAR_LISP ("name", addr /\* doc *\/)
999 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
1001 if (defunflag)
1002 commas = generate_globals ? 4 : 5;
1003 else if (defvarperbufferflag)
1004 commas = 3;
1005 else if (defvarflag)
1006 commas = 1;
1007 else /* For DEFSIMPLE and DEFPRED. */
1008 commas = 2;
1010 while (commas)
1012 if (c == ',')
1014 commas--;
1016 if (defunflag && (commas == 1 || commas == 2))
1018 int scanned = 0;
1020 c = getc (infile);
1021 while (c_isspace (c));
1023 if (c < 0)
1024 goto eof;
1025 ungetc (c, infile);
1026 if (commas == 2) /* Pick up minargs. */
1027 scanned = fscanf (infile, "%d", &minargs);
1028 else /* Pick up maxargs. */
1029 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
1031 if (generate_globals)
1032 maxargs = (c == 'M') ? -1 : -2;
1033 else
1034 maxargs = -1;
1036 else
1037 scanned = fscanf (infile, "%d", &maxargs);
1038 if (scanned < 0)
1039 goto eof;
1043 if (c == EOF)
1044 goto eof;
1045 c = getc (infile);
1048 if (generate_globals)
1050 struct global *g = add_global (FUNCTION, name, maxargs, 0);
1051 if (!g)
1052 continue;
1054 /* The following code tries to recognize function attributes
1055 specified after the docstring, e.g.:
1057 DEFUN ("foo", Ffoo, Sfoo, X, Y, Z,
1058 doc: /\* doc *\/
1059 attributes: attribute1 attribute2 ...)
1060 (Lisp_Object arg...)
1062 Now only ’const’, ’noinline’ and 'noreturn' attributes
1063 are used. */
1065 /* Advance to the end of docstring. */
1066 c = getc (infile);
1067 if (c == EOF)
1068 goto eof;
1069 int d = getc (infile);
1070 if (d == EOF)
1071 goto eof;
1072 while (true)
1074 if (c == '*' && d == '/')
1075 break;
1076 c = d, d = getc (infile);
1077 if (d == EOF)
1078 goto eof;
1080 /* Skip spaces, if any. */
1083 c = getc (infile);
1084 if (c == EOF)
1085 goto eof;
1087 while (c_isspace (c));
1089 /* Check for 'attributes:' token. */
1090 if (c == 'a' && stream_match (infile, "ttributes:"))
1092 char *p = input_buffer;
1093 /* Collect attributes up to ')'. */
1094 while (true)
1096 c = getc (infile);
1097 if (c == EOF)
1098 goto eof;
1099 if (c == ')')
1100 break;
1101 if (p - input_buffer > sizeof (input_buffer))
1102 abort ();
1103 *p++ = c;
1105 *p = 0;
1106 if (strstr (input_buffer, "noreturn"))
1107 g->flags |= DEFUN_noreturn;
1108 if (strstr (input_buffer, "const"))
1109 g->flags |= DEFUN_const;
1110 if (strstr (input_buffer, "noinline"))
1111 g->flags |= DEFUN_noinline;
1113 continue;
1116 while (c_isspace (c))
1117 c = getc (infile);
1119 if (c == '"')
1120 c = read_c_string_or_comment (infile, 0, false, 0);
1122 while (c != EOF && c != ',' && c != '/')
1123 c = getc (infile);
1124 if (c == ',')
1127 c = getc (infile);
1128 while (c_isspace (c));
1130 while (c_isalpha (c))
1131 c = getc (infile);
1132 if (c == ':')
1134 doc_keyword = true;
1136 c = getc (infile);
1137 while (c_isspace (c));
1141 if (c == '"'
1142 || (c == '/'
1143 && (c = getc (infile),
1144 ungetc (c, infile),
1145 c == '*')))
1147 bool comment = c != '"';
1148 bool saw_usage;
1150 printf ("\037%c%s\n", defvarflag ? 'V' : 'F', input_buffer);
1152 if (comment)
1153 getc (infile); /* Skip past `*'. */
1154 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
1156 /* If this is a defun, find the arguments and print them. If
1157 this function takes MANY or UNEVALLED args, then the C source
1158 won't give the names of the arguments, so we shouldn't bother
1159 trying to find them.
1161 Various doc-string styles:
1162 0: DEFUN (..., "DOC") (args) [!comment]
1163 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
1164 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
1166 if (defunflag && maxargs != -1 && !saw_usage)
1168 char argbuf[1024], *p = argbuf;
1170 if (!comment || doc_keyword)
1171 while (c != ')')
1173 if (c < 0)
1174 goto eof;
1175 c = getc (infile);
1178 /* Skip into arguments. */
1179 while (c != '(')
1181 if (c < 0)
1182 goto eof;
1183 c = getc (infile);
1185 /* Copy arguments into ARGBUF. */
1186 *p++ = c;
1189 c = getc (infile);
1190 if (c < 0)
1191 goto eof;
1192 *p++ = c;
1194 while (c != ')');
1196 *p = '\0';
1197 /* Output them. */
1198 fputs ("\n\n", stdout);
1199 write_c_args (input_buffer, argbuf, minargs, maxargs);
1201 else if (defunflag && maxargs == -1 && !saw_usage)
1202 /* The DOC should provide the usage form. */
1203 fprintf (stderr, "Missing 'usage' for function '%s'.\n",
1204 input_buffer);
1207 eof:
1208 if (ferror (infile) || fclose (infile) != 0)
1209 fatal ("read error");
1212 /* Read a file of Lisp code, compiled or interpreted.
1213 Looks for
1214 (defun NAME ARGS DOCSTRING ...)
1215 (defmacro NAME ARGS DOCSTRING ...)
1216 (defsubst NAME ARGS DOCSTRING ...)
1217 (autoload (quote NAME) FILE DOCSTRING ...)
1218 (defvar NAME VALUE DOCSTRING)
1219 (defconst NAME VALUE DOCSTRING)
1220 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1221 (fset (quote NAME) #[... DOCSTRING ...])
1222 (defalias (quote NAME) #[... DOCSTRING ...])
1223 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1224 starting in column zero.
1225 (quote NAME) may appear as 'NAME as well.
1227 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1228 When we find that, we save it for the following defining-form,
1229 and we use that instead of reading a doc string within that defining-form.
1231 For defvar, defconst, and fset we skip to the docstring with a kludgy
1232 formatting convention: all docstrings must appear on the same line as the
1233 initial open-paren (the one in column zero) and must contain a backslash
1234 and a newline immediately after the initial double-quote. No newlines
1235 must appear between the beginning of the form and the first double-quote.
1236 For defun, defmacro, and autoload, we know how to skip over the
1237 arglist, but the doc string must still have a backslash and newline
1238 immediately after the double quote.
1239 The only source files that must follow this convention are preloaded
1240 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1241 file that we should look at, and they are no problem because byte-compiler
1242 output follows this convention.
1243 The NAME and DOCSTRING are output.
1244 NAME is preceded by `F' for a function or `V' for a variable.
1245 An entry is output only if DOCSTRING has \ newline just after the opening ".
1248 static void
1249 skip_white (FILE *infile)
1251 int c;
1253 c = getc (infile);
1254 while (c_isspace (c));
1256 ungetc (c, infile);
1259 static void
1260 read_lisp_symbol (FILE *infile, char *buffer)
1262 int c;
1263 char *fillp = buffer;
1265 skip_white (infile);
1266 while (true)
1268 c = getc (infile);
1269 if (c == '\\')
1271 c = getc (infile);
1272 if (c < 0)
1273 return;
1274 *fillp++ = c;
1276 else if (c_isspace (c) || c == '(' || c == ')' || c < 0)
1278 ungetc (c, infile);
1279 *fillp = 0;
1280 break;
1282 else
1283 *fillp++ = c;
1286 if (! buffer[0])
1287 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1289 skip_white (infile);
1292 static bool
1293 search_lisp_doc_at_eol (FILE *infile)
1295 int c = 0, c1 = 0, c2 = 0;
1297 /* Skip until the end of line; remember two previous chars. */
1298 while (c != '\n' && c != '\r' && c != EOF)
1300 c2 = c1;
1301 c1 = c;
1302 c = getc (infile);
1305 /* If two previous characters were " and \,
1306 this is a doc string. Otherwise, there is none. */
1307 if (c2 != '"' || c1 != '\\')
1309 #ifdef DEBUG
1310 fprintf (stderr, "## non-docstring found\n");
1311 #endif
1312 ungetc (c, infile);
1313 return false;
1315 return true;
1318 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1320 static void
1321 scan_lisp_file (const char *filename, const char *mode)
1323 FILE *infile;
1324 int c;
1325 char *saved_string = 0;
1326 /* These are the only files that are loaded uncompiled, and must
1327 follow the conventions of the doc strings expected by this
1328 function. These conventions are automatically followed by the
1329 byte compiler when it produces the .elc files. */
1330 static struct {
1331 const char *fn;
1332 int fl;
1333 } const uncompiled[] = {
1334 DEF_ELISP_FILE (loaddefs.el),
1335 DEF_ELISP_FILE (loadup.el),
1336 DEF_ELISP_FILE (charprop.el),
1337 DEF_ELISP_FILE (cp51932.el),
1338 DEF_ELISP_FILE (eucjp-ms.el)
1340 int i;
1341 int flen = strlen (filename);
1343 if (generate_globals)
1344 fatal ("scanning lisp file when -g specified");
1345 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1347 bool match = false;
1348 for (i = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]); i++)
1350 if (uncompiled[i].fl <= flen
1351 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1352 && (flen == uncompiled[i].fl
1353 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1355 match = true;
1356 break;
1359 if (!match)
1360 fatal ("uncompiled lisp file %s is not supported", filename);
1363 infile = fopen (filename, mode);
1364 if (infile == NULL)
1366 perror (filename);
1367 exit (EXIT_FAILURE);
1370 c = '\n';
1371 while (!feof (infile))
1373 char buffer[BUFSIZ];
1374 char type;
1376 /* If not at end of line, skip till we get to one. */
1377 if (c != '\n' && c != '\r')
1379 c = getc (infile);
1380 continue;
1382 /* Skip the line break. */
1383 while (c == '\n' || c == '\r')
1384 c = getc (infile);
1385 /* Detect a dynamic doc string and save it for the next expression. */
1386 if (c == '#')
1388 c = getc (infile);
1389 if (c == '@')
1391 ptrdiff_t length = 0;
1392 ptrdiff_t i;
1394 /* Read the length. */
1395 while ((c = getc (infile),
1396 c_isdigit (c)))
1398 if (INT_MULTIPLY_WRAPV (length, 10, &length)
1399 || INT_ADD_WRAPV (length, c - '0', &length)
1400 || SIZE_MAX < length)
1401 memory_exhausted ();
1404 if (length <= 1)
1405 fatal ("invalid dynamic doc string length");
1407 if (c != ' ')
1408 fatal ("space not found after dynamic doc string length");
1410 /* The next character is a space that is counted in the length
1411 but not part of the doc string.
1412 We already read it, so just ignore it. */
1413 length--;
1415 /* Read in the contents. */
1416 free (saved_string);
1417 saved_string = xmalloc (length);
1418 for (i = 0; i < length; i++)
1419 saved_string[i] = getc (infile);
1420 /* The last character is a ^_.
1421 That is needed in the .elc file
1422 but it is redundant in DOC. So get rid of it here. */
1423 saved_string[length - 1] = 0;
1424 /* Skip the line break. */
1425 while (c == '\n' || c == '\r')
1426 c = getc (infile);
1427 /* Skip the following line. */
1428 while (! (c == '\n' || c == '\r' || c < 0))
1429 c = getc (infile);
1431 continue;
1434 if (c != '(')
1435 continue;
1437 read_lisp_symbol (infile, buffer);
1439 if (! strcmp (buffer, "defun")
1440 || ! strcmp (buffer, "defmacro")
1441 || ! strcmp (buffer, "defsubst"))
1443 type = 'F';
1444 read_lisp_symbol (infile, buffer);
1446 /* Skip the arguments: either "nil" or a list in parens. */
1448 c = getc (infile);
1449 if (c == 'n') /* nil */
1451 if ((c = getc (infile)) != 'i'
1452 || (c = getc (infile)) != 'l')
1454 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1455 buffer, filename);
1456 continue;
1459 else if (c != '(')
1461 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1462 buffer, filename);
1463 continue;
1465 else
1466 while (! (c == ')' || c < 0))
1467 c = getc (infile);
1468 skip_white (infile);
1470 /* If the next three characters aren't `dquote bslash newline'
1471 then we're not reading a docstring.
1473 if ((c = getc (infile)) != '"'
1474 || (c = getc (infile)) != '\\'
1475 || ((c = getc (infile)) != '\n' && c != '\r'))
1477 #ifdef DEBUG
1478 fprintf (stderr, "## non-docstring in %s (%s)\n",
1479 buffer, filename);
1480 #endif
1481 continue;
1485 /* defcustom can only occur in uncompiled Lisp files. */
1486 else if (! strcmp (buffer, "defvar")
1487 || ! strcmp (buffer, "defconst")
1488 || ! strcmp (buffer, "defcustom"))
1490 type = 'V';
1491 read_lisp_symbol (infile, buffer);
1493 if (saved_string == 0)
1494 if (!search_lisp_doc_at_eol (infile))
1495 continue;
1498 else if (! strcmp (buffer, "custom-declare-variable")
1499 || ! strcmp (buffer, "defvaralias")
1502 type = 'V';
1504 c = getc (infile);
1505 if (c == '\'')
1506 read_lisp_symbol (infile, buffer);
1507 else
1509 if (c != '(')
1511 fprintf (stderr,
1512 "## unparsable name in custom-declare-variable in %s\n",
1513 filename);
1514 continue;
1516 read_lisp_symbol (infile, buffer);
1517 if (strcmp (buffer, "quote"))
1519 fprintf (stderr,
1520 "## unparsable name in custom-declare-variable in %s\n",
1521 filename);
1522 continue;
1524 read_lisp_symbol (infile, buffer);
1525 c = getc (infile);
1526 if (c != ')')
1528 fprintf (stderr,
1529 "## unparsable quoted name in custom-declare-variable in %s\n",
1530 filename);
1531 continue;
1535 if (saved_string == 0)
1536 if (!search_lisp_doc_at_eol (infile))
1537 continue;
1540 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1542 type = 'F';
1544 c = getc (infile);
1545 if (c == '\'')
1546 read_lisp_symbol (infile, buffer);
1547 else
1549 if (c != '(')
1551 fprintf (stderr, "## unparsable name in fset in %s\n",
1552 filename);
1553 continue;
1555 read_lisp_symbol (infile, buffer);
1556 if (strcmp (buffer, "quote"))
1558 fprintf (stderr, "## unparsable name in fset in %s\n",
1559 filename);
1560 continue;
1562 read_lisp_symbol (infile, buffer);
1563 c = getc (infile);
1564 if (c != ')')
1566 fprintf (stderr,
1567 "## unparsable quoted name in fset in %s\n",
1568 filename);
1569 continue;
1573 if (saved_string == 0)
1574 if (!search_lisp_doc_at_eol (infile))
1575 continue;
1578 else if (! strcmp (buffer, "autoload"))
1580 type = 'F';
1581 c = getc (infile);
1582 if (c == '\'')
1583 read_lisp_symbol (infile, buffer);
1584 else
1586 if (c != '(')
1588 fprintf (stderr, "## unparsable name in autoload in %s\n",
1589 filename);
1590 continue;
1592 read_lisp_symbol (infile, buffer);
1593 if (strcmp (buffer, "quote"))
1595 fprintf (stderr, "## unparsable name in autoload in %s\n",
1596 filename);
1597 continue;
1599 read_lisp_symbol (infile, buffer);
1600 c = getc (infile);
1601 if (c != ')')
1603 fprintf (stderr,
1604 "## unparsable quoted name in autoload in %s\n",
1605 filename);
1606 continue;
1609 skip_white (infile);
1610 c = getc (infile);
1611 if (c != '\"')
1613 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1614 buffer, filename);
1615 continue;
1617 read_c_string_or_comment (infile, 0, false, 0);
1619 if (saved_string == 0)
1620 if (!search_lisp_doc_at_eol (infile))
1621 continue;
1624 #ifdef DEBUG
1625 else if (! strcmp (buffer, "if")
1626 || ! strcmp (buffer, "byte-code"))
1627 continue;
1628 #endif
1630 else
1632 #ifdef DEBUG
1633 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1634 buffer, filename);
1635 #endif
1636 continue;
1639 /* At this point, we should either use the previous dynamic doc string in
1640 saved_string or gobble a doc string from the input file.
1641 In the latter case, the opening quote (and leading backslash-newline)
1642 have already been read. */
1644 printf ("\037%c%s\n", type, buffer);
1645 if (saved_string)
1647 fputs (saved_string, stdout);
1648 /* Don't use one dynamic doc string twice. */
1649 free (saved_string);
1650 saved_string = 0;
1652 else
1653 read_c_string_or_comment (infile, 1, false, 0);
1655 free (saved_string);
1656 if (ferror (infile) || fclose (infile) != 0)
1657 fatal ("%s: read error", filename);
1661 /* make-docfile.c ends here */