Assign correct general-category and names to surrogates
[emacs.git] / lib-src / make-docfile.c
blobbada8df9f72de78968fc52aa57d1e64b058baf42
1 /* Generate doc-string file for GNU Emacs from source files.
3 Copyright (C) 1985-1986, 1992-1994, 1997, 1999-2015 Free Software
4 Foundation, Inc.
6 This file is part of GNU Emacs.
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
22 /* The arguments given to this program are all the C and Lisp source files
23 of GNU Emacs. .elc and .el and .c files are allowed.
24 A .o file can also be specified; the .c file it was made from is used.
25 This helps the makefile pass the correct list of files.
26 Option -d DIR means change to DIR before looking for files.
28 The results, which go to standard output or to a file
29 specified with -a or -o (-a to append, -o to start from nothing),
30 are entries containing function or variable names and their documentation.
31 Each entry starts with a ^_ character.
32 Then comes F for a function or V for a variable.
33 Then comes the function or variable name, terminated with a newline.
34 Then comes the documentation for that function or variable.
37 #include <config.h>
39 #include <stdbool.h>
40 #include <stdio.h>
41 #include <stdlib.h> /* config.h unconditionally includes this anyway */
43 #ifdef WINDOWSNT
44 /* Defined to be sys_fopen in ms-w32.h, but only #ifdef emacs, so this
45 is really just insurance. */
46 #undef fopen
47 #include <direct.h>
48 #endif /* WINDOWSNT */
50 #include <binary-io.h>
52 #ifdef DOS_NT
53 /* Defined to be sys_chdir in ms-w32.h, but only #ifdef emacs, so this
54 is really just insurance.
56 Similarly, msdos defines this as sys_chdir, but we're not linking with the
57 file where that function is defined. */
58 #undef chdir
59 #define IS_SLASH(c) ((c) == '/' || (c) == '\\' || (c) == ':')
60 #else /* not DOS_NT */
61 #define IS_SLASH(c) ((c) == '/')
62 #endif /* not DOS_NT */
64 static int scan_file (char *filename);
65 static int scan_lisp_file (const char *filename, const char *mode);
66 static int scan_c_file (char *filename, const char *mode);
67 static int scan_c_stream (FILE *infile);
68 static void start_globals (void);
69 static void write_globals (void);
71 #include <unistd.h>
73 /* Name this program was invoked with. */
74 char *progname;
76 /* Nonzero if this invocation is generating globals.h. */
77 int generate_globals;
79 /* Print error message. `s1' is printf control string, `s2' is arg for it. */
81 /* VARARGS1 */
82 static void
83 error (const char *s1, const char *s2)
85 fprintf (stderr, "%s: ", progname);
86 fprintf (stderr, s1, s2);
87 fprintf (stderr, "\n");
90 /* Print error message and exit. */
92 /* VARARGS1 */
93 static _Noreturn void
94 fatal (const char *s1, const char *s2)
96 error (s1, s2);
97 exit (EXIT_FAILURE);
100 /* Like malloc but get fatal error if memory is exhausted. */
102 static void *
103 xmalloc (unsigned int size)
105 void *result = (void *) malloc (size);
106 if (result == NULL)
107 fatal ("virtual memory exhausted", 0);
108 return result;
111 /* Like strdup, but get fatal error if memory is exhausted. */
113 static char *
114 xstrdup (char *s)
116 char *result = strdup (s);
117 if (! result)
118 fatal ("virtual memory exhausted", 0);
119 return result;
122 /* Like realloc but get fatal error if memory is exhausted. */
124 static void *
125 xrealloc (void *arg, unsigned int size)
127 void *result = (void *) realloc (arg, size);
128 if (result == NULL)
129 fatal ("virtual memory exhausted", 0);
130 return result;
135 main (int argc, char **argv)
137 int i;
138 int err_count = 0;
140 progname = argv[0];
142 /* If first two args are -o FILE, output to FILE. */
143 i = 1;
144 if (argc > i + 1 && !strcmp (argv[i], "-o"))
146 if (! freopen (argv[i + 1], "w", stdout))
148 perror (argv[i + 1]);
149 return EXIT_FAILURE;
151 i += 2;
153 if (argc > i + 1 && !strcmp (argv[i], "-a"))
155 if (! freopen (argv[i + 1], "a", stdout))
157 perror (argv[i + 1]);
158 return EXIT_FAILURE;
160 i += 2;
162 if (argc > i + 1 && !strcmp (argv[i], "-d"))
164 if (chdir (argv[i + 1]) != 0)
166 perror (argv[i + 1]);
167 return EXIT_FAILURE;
169 i += 2;
171 if (argc > i && !strcmp (argv[i], "-g"))
173 generate_globals = 1;
174 ++i;
177 set_binary_mode (fileno (stdout), O_BINARY);
179 if (generate_globals)
180 start_globals ();
182 if (argc <= i)
183 scan_c_stream (stdin);
184 else
186 int first_infile = i;
187 for (; i < argc; i++)
189 int j;
190 /* Don't process one file twice. */
191 for (j = first_infile; j < i; j++)
192 if (strcmp (argv[i], argv[j]) == 0)
193 break;
194 if (j == i)
195 err_count += scan_file (argv[i]);
199 if (err_count == 0 && generate_globals)
200 write_globals ();
202 return (err_count > 0 ? EXIT_FAILURE : EXIT_SUCCESS);
205 /* Add a source file name boundary marker in the output file. */
206 static void
207 put_filename (char *filename)
209 char *tmp;
211 for (tmp = filename; *tmp; tmp++)
213 if (IS_DIRECTORY_SEP (*tmp))
214 filename = tmp + 1;
217 printf ("\037S%s\n", filename);
220 /* Read file FILENAME and output its doc strings to stdout.
221 Return 1 if file is not found, 0 if it is found. */
223 static int
224 scan_file (char *filename)
227 size_t len = strlen (filename);
229 if (!generate_globals)
230 put_filename (filename);
231 if (len > 4 && !strcmp (filename + len - 4, ".elc"))
232 return scan_lisp_file (filename, "rb");
233 else if (len > 3 && !strcmp (filename + len - 3, ".el"))
234 return scan_lisp_file (filename, "r");
235 else
236 return scan_c_file (filename, "r");
239 static void
240 start_globals (void)
242 puts ("/* This file was auto-generated by make-docfile. */");
243 puts ("/* DO NOT EDIT. */");
244 puts ("struct emacs_globals {");
247 static char input_buffer[128];
249 /* Some state during the execution of `read_c_string_or_comment'. */
250 struct rcsoc_state
252 /* A count of spaces and newlines that have been read, but not output. */
253 unsigned pending_spaces, pending_newlines;
255 /* Where we're reading from. */
256 FILE *in_file;
258 /* If non-zero, a buffer into which to copy characters. */
259 char *buf_ptr;
260 /* If non-zero, a file into which to copy characters. */
261 FILE *out_file;
263 /* A keyword we look for at the beginning of lines. If found, it is
264 not copied, and SAW_KEYWORD is set to true. */
265 const char *keyword;
266 /* The current point we've reached in an occurrence of KEYWORD in
267 the input stream. */
268 const char *cur_keyword_ptr;
269 /* Set to true if we saw an occurrence of KEYWORD. */
270 int saw_keyword;
273 /* Output CH to the file or buffer in STATE. Any pending newlines or
274 spaces are output first. */
276 static void
277 put_char (int ch, struct rcsoc_state *state)
279 int out_ch;
282 if (state->pending_newlines > 0)
284 state->pending_newlines--;
285 out_ch = '\n';
287 else if (state->pending_spaces > 0)
289 state->pending_spaces--;
290 out_ch = ' ';
292 else
293 out_ch = ch;
295 if (state->out_file)
296 putc (out_ch, state->out_file);
297 if (state->buf_ptr)
298 *state->buf_ptr++ = out_ch;
300 while (out_ch != ch);
303 /* If in the middle of scanning a keyword, continue scanning with
304 character CH, otherwise output CH to the file or buffer in STATE.
305 Any pending newlines or spaces are output first, as well as any
306 previously scanned characters that were thought to be part of a
307 keyword, but were in fact not. */
309 static void
310 scan_keyword_or_put_char (int ch, struct rcsoc_state *state)
312 if (state->keyword
313 && *state->cur_keyword_ptr == ch
314 && (state->cur_keyword_ptr > state->keyword
315 || state->pending_newlines > 0))
316 /* We might be looking at STATE->keyword at some point.
317 Keep looking until we know for sure. */
319 if (*++state->cur_keyword_ptr == '\0')
320 /* Saw the whole keyword. Set SAW_KEYWORD flag to true. */
322 state->saw_keyword = 1;
324 /* Reset the scanning pointer. */
325 state->cur_keyword_ptr = state->keyword;
327 /* Canonicalize whitespace preceding a usage string. */
328 state->pending_newlines = 2;
329 state->pending_spaces = 0;
331 /* Skip any whitespace between the keyword and the
332 usage string. */
334 ch = getc (state->in_file);
335 while (ch == ' ' || ch == '\n');
337 /* Output the open-paren we just read. */
338 put_char (ch, state);
340 /* Skip the function name and replace it with `fn'. */
342 ch = getc (state->in_file);
343 while (ch != ' ' && ch != ')');
344 put_char ('f', state);
345 put_char ('n', state);
347 /* Put back the last character. */
348 ungetc (ch, state->in_file);
351 else
353 if (state->keyword && state->cur_keyword_ptr > state->keyword)
354 /* We scanned the beginning of a potential usage
355 keyword, but it was a false alarm. Output the
356 part we scanned. */
358 const char *p;
360 for (p = state->keyword; p < state->cur_keyword_ptr; p++)
361 put_char (*p, state);
363 state->cur_keyword_ptr = state->keyword;
366 put_char (ch, state);
371 /* Skip a C string or C-style comment from INFILE, and return the
372 character that follows. COMMENT non-zero means skip a comment. If
373 PRINTFLAG is positive, output string contents to stdout. If it is
374 negative, store contents in buf. Convert escape sequences \n and
375 \t to newline and tab; discard \ followed by newline.
376 If SAW_USAGE is non-zero, then any occurrences of the string `usage:'
377 at the beginning of a line will be removed, and *SAW_USAGE set to
378 true if any were encountered. */
380 static int
381 read_c_string_or_comment (FILE *infile, int printflag, int comment, int *saw_usage)
383 register int c;
384 struct rcsoc_state state;
386 state.in_file = infile;
387 state.buf_ptr = (printflag < 0 ? input_buffer : 0);
388 state.out_file = (printflag > 0 ? stdout : 0);
389 state.pending_spaces = 0;
390 state.pending_newlines = 0;
391 state.keyword = (saw_usage ? "usage:" : 0);
392 state.cur_keyword_ptr = state.keyword;
393 state.saw_keyword = 0;
395 c = getc (infile);
396 if (comment)
397 while (c == '\n' || c == '\r' || c == '\t' || c == ' ')
398 c = getc (infile);
400 while (c != EOF)
402 while (c != EOF && (comment ? c != '*' : c != '"'))
404 if (c == '\\')
406 c = getc (infile);
407 if (c == '\n' || c == '\r')
409 c = getc (infile);
410 continue;
412 if (c == 'n')
413 c = '\n';
414 if (c == 't')
415 c = '\t';
418 if (c == ' ')
419 state.pending_spaces++;
420 else if (c == '\n')
422 state.pending_newlines++;
423 state.pending_spaces = 0;
425 else
426 scan_keyword_or_put_char (c, &state);
428 c = getc (infile);
431 if (c != EOF)
432 c = getc (infile);
434 if (comment)
436 if (c == '/')
438 c = getc (infile);
439 break;
442 scan_keyword_or_put_char ('*', &state);
444 else
446 if (c != '"')
447 break;
449 /* If we had a "", concatenate the two strings. */
450 c = getc (infile);
454 if (printflag < 0)
455 *state.buf_ptr = 0;
457 if (saw_usage)
458 *saw_usage = state.saw_keyword;
460 return c;
465 /* Write to stdout the argument names of function FUNC, whose text is in BUF.
466 MINARGS and MAXARGS are the minimum and maximum number of arguments. */
468 static void
469 write_c_args (char *func, char *buf, int minargs, int maxargs)
471 register char *p;
472 int in_ident = 0;
473 char *ident_start IF_LINT (= NULL);
474 size_t ident_length = 0;
476 fputs ("(fn", stdout);
478 if (*buf == '(')
479 ++buf;
481 for (p = buf; *p; p++)
483 char c = *p;
485 /* Notice when a new identifier starts. */
486 if ((('A' <= c && c <= 'Z')
487 || ('a' <= c && c <= 'z')
488 || ('0' <= c && c <= '9')
489 || c == '_')
490 != in_ident)
492 if (!in_ident)
494 in_ident = 1;
495 ident_start = p;
497 else
499 in_ident = 0;
500 ident_length = p - ident_start;
504 /* Found the end of an argument, write out the last seen
505 identifier. */
506 if (c == ',' || c == ')')
508 if (ident_length == 0)
510 error ("empty arg list for `%s' should be (void), not ()", func);
511 continue;
514 if (strncmp (ident_start, "void", ident_length) == 0)
515 continue;
517 putchar (' ');
519 if (minargs == 0 && maxargs > 0)
520 fputs ("&optional ", stdout);
522 minargs--;
523 maxargs--;
525 /* In C code, `default' is a reserved word, so we spell it
526 `defalt'; demangle that here. */
527 if (ident_length == 6 && memcmp (ident_start, "defalt", 6) == 0)
528 fputs ("DEFAULT", stdout);
529 else
530 while (ident_length-- > 0)
532 c = *ident_start++;
533 if (c >= 'a' && c <= 'z')
534 /* Upcase the letter. */
535 c += 'A' - 'a';
536 else if (c == '_')
537 /* Print underscore as hyphen. */
538 c = '-';
539 putchar (c);
544 putchar (')');
547 /* The types of globals. These are sorted roughly in decreasing alignment
548 order to avoid allocation gaps, except that symbols and functions
549 are last. */
550 enum global_type
552 INVALID,
553 LISP_OBJECT,
554 EMACS_INTEGER,
555 BOOLEAN,
556 SYMBOL,
557 FUNCTION
560 /* A single global. */
561 struct global
563 enum global_type type;
564 char *name;
565 int flags;
566 union
568 int value;
569 char const *svalue;
570 } v;
573 /* Bit values for FLAGS field from the above. Applied for DEFUNs only. */
574 enum { DEFUN_noreturn = 1, DEFUN_const = 2 };
576 /* All the variable names we saw while scanning C sources in `-g'
577 mode. */
578 int num_globals;
579 int num_globals_allocated;
580 struct global *globals;
582 static struct global *
583 add_global (enum global_type type, char *name, int value, char const *svalue)
585 /* Ignore the one non-symbol that can occur. */
586 if (strcmp (name, "..."))
588 ++num_globals;
590 if (num_globals_allocated == 0)
592 num_globals_allocated = 100;
593 globals = xmalloc (num_globals_allocated * sizeof (struct global));
595 else if (num_globals == num_globals_allocated)
597 num_globals_allocated *= 2;
598 globals = xrealloc (globals,
599 num_globals_allocated * sizeof (struct global));
602 globals[num_globals - 1].type = type;
603 globals[num_globals - 1].name = name;
604 if (svalue)
605 globals[num_globals - 1].v.svalue = svalue;
606 else
607 globals[num_globals - 1].v.value = value;
608 globals[num_globals - 1].flags = 0;
609 return globals + num_globals - 1;
611 return NULL;
614 static int
615 compare_globals (const void *a, const void *b)
617 const struct global *ga = a;
618 const struct global *gb = b;
620 if (ga->type != gb->type)
621 return ga->type - gb->type;
623 /* Consider "nil" to be the least, so that iQnil is zero. That
624 way, Qnil's internal representation is zero, which is a bit faster. */
625 if (ga->type == SYMBOL)
627 bool a_nil = strcmp (ga->name, "Qnil") == 0;
628 bool b_nil = strcmp (gb->name, "Qnil") == 0;
629 if (a_nil | b_nil)
630 return b_nil - a_nil;
633 return strcmp (ga->name, gb->name);
636 static void
637 close_emacs_globals (int num_symbols)
639 printf (("};\n"
640 "extern struct emacs_globals globals;\n"
641 "\n"
642 "#ifndef DEFINE_SYMBOLS\n"
643 "extern\n"
644 "#endif\n"
645 "struct Lisp_Symbol alignas (GCALIGNMENT) lispsym[%d];\n"),
646 num_symbols);
649 static void
650 write_globals (void)
652 int i, j;
653 bool seen_defun = false;
654 int symnum = 0;
655 int num_symbols = 0;
656 qsort (globals, num_globals, sizeof (struct global), compare_globals);
658 j = 0;
659 for (i = 0; i < num_globals; i++)
661 while (i + 1 < num_globals
662 && strcmp (globals[i].name, globals[i + 1].name) == 0)
664 if (globals[i].type == FUNCTION
665 && globals[i].v.value != globals[i + 1].v.value)
666 error ("function '%s' defined twice with differing signatures",
667 globals[i].name);
668 i++;
670 num_symbols += globals[i].type == SYMBOL;
671 globals[j++] = globals[i];
673 num_globals = j;
675 for (i = 0; i < num_globals; ++i)
677 char const *type = 0;
679 switch (globals[i].type)
681 case EMACS_INTEGER:
682 type = "EMACS_INT";
683 break;
684 case BOOLEAN:
685 type = "bool";
686 break;
687 case LISP_OBJECT:
688 type = "Lisp_Object";
689 break;
690 case SYMBOL:
691 case FUNCTION:
692 if (!seen_defun)
694 close_emacs_globals (num_symbols);
695 putchar ('\n');
696 seen_defun = true;
698 break;
699 default:
700 fatal ("not a recognized DEFVAR_", 0);
703 if (type)
705 printf (" %s f_%s;\n", type, globals[i].name);
706 printf ("#define %s globals.f_%s\n",
707 globals[i].name, globals[i].name);
709 else if (globals[i].type == SYMBOL)
710 printf (("#define i%s %d\n"
711 "DEFINE_LISP_SYMBOL (%s)\n"),
712 globals[i].name, symnum++, globals[i].name);
713 else
715 if (globals[i].flags & DEFUN_noreturn)
716 fputs ("_Noreturn ", stdout);
718 printf ("EXFUN (%s, ", globals[i].name);
719 if (globals[i].v.value == -1)
720 fputs ("MANY", stdout);
721 else if (globals[i].v.value == -2)
722 fputs ("UNEVALLED", stdout);
723 else
724 printf ("%d", globals[i].v.value);
725 putchar (')');
727 if (globals[i].flags & DEFUN_const)
728 fputs (" ATTRIBUTE_CONST", stdout);
730 puts (";");
734 if (!seen_defun)
735 close_emacs_globals (num_symbols);
737 puts ("#ifdef DEFINE_SYMBOLS");
738 puts ("static char const *const defsym_name[] = {");
739 for (int i = 0; i < num_globals; i++)
740 if (globals[i].type == SYMBOL)
741 printf ("\t\"%s\",\n", globals[i].v.svalue);
742 puts ("};");
743 puts ("#endif");
745 puts ("#define Qnil builtin_lisp_symbol (0)");
746 puts ("#if DEFINE_NON_NIL_Q_SYMBOL_MACROS");
747 num_symbols = 0;
748 for (int i = 0; i < num_globals; i++)
749 if (globals[i].type == SYMBOL && num_symbols++ != 0)
750 printf ("# define %s builtin_lisp_symbol (%d)\n",
751 globals[i].name, num_symbols - 1);
752 puts ("#endif");
756 /* Read through a c file. If a .o file is named,
757 the corresponding .c or .m file is read instead.
758 Looks for DEFUN constructs such as are defined in ../src/lisp.h.
759 Accepts any word starting DEF... so it finds DEFSIMPLE and DEFPRED. */
761 static int
762 scan_c_file (char *filename, const char *mode)
764 FILE *infile;
765 int extension = filename[strlen (filename) - 1];
767 if (extension == 'o')
768 filename[strlen (filename) - 1] = 'c';
770 infile = fopen (filename, mode);
772 if (infile == NULL && extension == 'o')
774 /* Try .m. */
775 filename[strlen (filename) - 1] = 'm';
776 infile = fopen (filename, mode);
777 if (infile == NULL)
778 filename[strlen (filename) - 1] = 'c'; /* Don't confuse people. */
781 /* No error if non-ex input file. */
782 if (infile == NULL)
784 perror (filename);
785 return 0;
788 /* Reset extension to be able to detect duplicate files. */
789 filename[strlen (filename) - 1] = extension;
790 return scan_c_stream (infile);
793 /* Return 1 if next input from INFILE is equal to P, -1 if EOF,
794 0 if input doesn't match. */
796 static int
797 stream_match (FILE *infile, const char *p)
799 for (; *p; p++)
801 int c = getc (infile);
802 if (c == EOF)
803 return -1;
804 if (c != *p)
805 return 0;
807 return 1;
810 static int
811 scan_c_stream (FILE *infile)
813 int commas, minargs, maxargs;
814 int c = '\n';
816 while (!feof (infile))
818 int doc_keyword = 0;
819 int defunflag = 0;
820 int defvarperbufferflag = 0;
821 int defvarflag = 0;
822 enum global_type type = INVALID;
823 char *name IF_LINT (= 0);
825 if (c != '\n' && c != '\r')
827 c = getc (infile);
828 continue;
830 c = getc (infile);
831 if (c == ' ')
833 while (c == ' ')
834 c = getc (infile);
835 if (c != 'D')
836 continue;
837 c = getc (infile);
838 if (c != 'E')
839 continue;
840 c = getc (infile);
841 if (c != 'F')
842 continue;
843 c = getc (infile);
844 if (c == 'S')
846 c = getc (infile);
847 if (c != 'Y')
848 continue;
849 c = getc (infile);
850 if (c != 'M')
851 continue;
852 c = getc (infile);
853 if (c != ' ' && c != '\t' && c != '(')
854 continue;
855 type = SYMBOL;
857 else if (c == 'V')
859 c = getc (infile);
860 if (c != 'A')
861 continue;
862 c = getc (infile);
863 if (c != 'R')
864 continue;
865 c = getc (infile);
866 if (c != '_')
867 continue;
869 defvarflag = 1;
871 c = getc (infile);
872 defvarperbufferflag = (c == 'P');
873 if (generate_globals)
875 if (c == 'I')
876 type = EMACS_INTEGER;
877 else if (c == 'L')
878 type = LISP_OBJECT;
879 else if (c == 'B')
880 type = BOOLEAN;
883 c = getc (infile);
884 /* We need to distinguish between DEFVAR_BOOL and
885 DEFVAR_BUFFER_DEFAULTS. */
886 if (generate_globals && type == BOOLEAN && c != 'O')
887 type = INVALID;
889 else
890 continue;
892 else if (c == 'D')
894 c = getc (infile);
895 if (c != 'E')
896 continue;
897 c = getc (infile);
898 if (c != 'F')
899 continue;
900 c = getc (infile);
901 defunflag = c == 'U';
903 else continue;
905 if (generate_globals
906 && (!defvarflag || defvarperbufferflag || type == INVALID)
907 && !defunflag && type != SYMBOL)
908 continue;
910 while (c != '(')
912 if (c < 0)
913 goto eof;
914 c = getc (infile);
917 if (type != SYMBOL)
919 /* Lisp variable or function name. */
920 c = getc (infile);
921 if (c != '"')
922 continue;
923 c = read_c_string_or_comment (infile, -1, 0, 0);
926 if (generate_globals)
928 int i = 0;
929 char const *svalue = 0;
931 /* Skip "," and whitespace. */
934 c = getc (infile);
936 while (c == ',' || c == ' ' || c == '\t' || c == '\n' || c == '\r');
938 /* Read in the identifier. */
941 if (c < 0)
942 goto eof;
943 input_buffer[i++] = c;
944 c = getc (infile);
946 while (! (c == ',' || c == ' ' || c == '\t'
947 || c == '\n' || c == '\r'));
948 input_buffer[i] = '\0';
950 name = xmalloc (i + 1);
951 memcpy (name, input_buffer, i + 1);
953 if (type == SYMBOL)
956 c = getc (infile);
957 while (c == ' ' || c == '\t' || c == '\n' || c == '\r');
958 if (c != '"')
959 continue;
960 c = read_c_string_or_comment (infile, -1, 0, 0);
961 svalue = xstrdup (input_buffer);
964 if (!defunflag)
966 add_global (type, name, 0, svalue);
967 continue;
971 if (type == SYMBOL)
972 continue;
974 /* DEFVAR_LISP ("name", addr, "doc")
975 DEFVAR_LISP ("name", addr /\* doc *\/)
976 DEFVAR_LISP ("name", addr, doc: /\* doc *\/) */
978 if (defunflag)
979 commas = generate_globals ? 4 : 5;
980 else if (defvarperbufferflag)
981 commas = 3;
982 else if (defvarflag)
983 commas = 1;
984 else /* For DEFSIMPLE and DEFPRED. */
985 commas = 2;
987 while (commas)
989 if (c == ',')
991 commas--;
993 if (defunflag && (commas == 1 || commas == 2))
995 int scanned = 0;
997 c = getc (infile);
998 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
999 if (c < 0)
1000 goto eof;
1001 ungetc (c, infile);
1002 if (commas == 2) /* Pick up minargs. */
1003 scanned = fscanf (infile, "%d", &minargs);
1004 else /* Pick up maxargs. */
1005 if (c == 'M' || c == 'U') /* MANY || UNEVALLED */
1007 if (generate_globals)
1008 maxargs = (c == 'M') ? -1 : -2;
1009 else
1010 maxargs = -1;
1012 else
1013 scanned = fscanf (infile, "%d", &maxargs);
1014 if (scanned < 0)
1015 goto eof;
1019 if (c == EOF)
1020 goto eof;
1021 c = getc (infile);
1024 if (generate_globals)
1026 struct global *g = add_global (FUNCTION, name, maxargs, 0);
1028 /* The following code tries to recognize function attributes
1029 specified after the docstring, e.g.:
1031 DEFUN ("foo", Ffoo, Sfoo, X, Y, Z,
1032 doc: /\* doc *\/
1033 attributes: attribute1 attribute2 ...)
1034 (Lisp_Object arg...)
1036 Now only 'noreturn' and 'const' attributes are used. */
1038 /* Advance to the end of docstring. */
1039 c = getc (infile);
1040 if (c == EOF)
1041 goto eof;
1042 int d = getc (infile);
1043 if (d == EOF)
1044 goto eof;
1045 while (1)
1047 if (c == '*' && d == '/')
1048 break;
1049 c = d, d = getc (infile);
1050 if (d == EOF)
1051 goto eof;
1053 /* Skip spaces, if any. */
1056 c = getc (infile);
1057 if (c == EOF)
1058 goto eof;
1060 while (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1061 /* Check for 'attributes:' token. */
1062 if (c == 'a' && stream_match (infile, "ttributes:"))
1064 char *p = input_buffer;
1065 /* Collect attributes up to ')'. */
1066 while (1)
1068 c = getc (infile);
1069 if (c == EOF)
1070 goto eof;
1071 if (c == ')')
1072 break;
1073 if (p - input_buffer > sizeof (input_buffer))
1074 abort ();
1075 *p++ = c;
1077 *p = 0;
1078 if (strstr (input_buffer, "noreturn"))
1079 g->flags |= DEFUN_noreturn;
1080 if (strstr (input_buffer, "const"))
1081 g->flags |= DEFUN_const;
1083 continue;
1086 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1087 c = getc (infile);
1089 if (c == '"')
1090 c = read_c_string_or_comment (infile, 0, 0, 0);
1092 while (c != EOF && c != ',' && c != '/')
1093 c = getc (infile);
1094 if (c == ',')
1096 c = getc (infile);
1097 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1098 c = getc (infile);
1099 while ((c >= 'a' && c <= 'z') || (c >= 'Z' && c <= 'Z'))
1100 c = getc (infile);
1101 if (c == ':')
1103 doc_keyword = 1;
1104 c = getc (infile);
1105 while (c == ' ' || c == '\n' || c == '\r' || c == '\t')
1106 c = getc (infile);
1110 if (c == '"'
1111 || (c == '/'
1112 && (c = getc (infile),
1113 ungetc (c, infile),
1114 c == '*')))
1116 int comment = c != '"';
1117 int saw_usage;
1119 printf ("\037%c%s\n", defvarflag ? 'V' : 'F', input_buffer);
1121 if (comment)
1122 getc (infile); /* Skip past `*'. */
1123 c = read_c_string_or_comment (infile, 1, comment, &saw_usage);
1125 /* If this is a defun, find the arguments and print them. If
1126 this function takes MANY or UNEVALLED args, then the C source
1127 won't give the names of the arguments, so we shouldn't bother
1128 trying to find them.
1130 Various doc-string styles:
1131 0: DEFUN (..., "DOC") (args) [!comment]
1132 1: DEFUN (..., /\* DOC *\/ (args)) [comment && !doc_keyword]
1133 2: DEFUN (..., doc: /\* DOC *\/) (args) [comment && doc_keyword]
1135 if (defunflag && maxargs != -1 && !saw_usage)
1137 char argbuf[1024], *p = argbuf;
1139 if (!comment || doc_keyword)
1140 while (c != ')')
1142 if (c < 0)
1143 goto eof;
1144 c = getc (infile);
1147 /* Skip into arguments. */
1148 while (c != '(')
1150 if (c < 0)
1151 goto eof;
1152 c = getc (infile);
1154 /* Copy arguments into ARGBUF. */
1155 *p++ = c;
1157 *p++ = c = getc (infile);
1158 while (c != ')');
1159 *p = '\0';
1160 /* Output them. */
1161 fputs ("\n\n", stdout);
1162 write_c_args (input_buffer, argbuf, minargs, maxargs);
1164 else if (defunflag && maxargs == -1 && !saw_usage)
1165 /* The DOC should provide the usage form. */
1166 fprintf (stderr, "Missing `usage' for function `%s'.\n",
1167 input_buffer);
1170 eof:
1171 fclose (infile);
1172 return 0;
1175 /* Read a file of Lisp code, compiled or interpreted.
1176 Looks for
1177 (defun NAME ARGS DOCSTRING ...)
1178 (defmacro NAME ARGS DOCSTRING ...)
1179 (defsubst NAME ARGS DOCSTRING ...)
1180 (autoload (quote NAME) FILE DOCSTRING ...)
1181 (defvar NAME VALUE DOCSTRING)
1182 (defconst NAME VALUE DOCSTRING)
1183 (fset (quote NAME) (make-byte-code ... DOCSTRING ...))
1184 (fset (quote NAME) #[... DOCSTRING ...])
1185 (defalias (quote NAME) #[... DOCSTRING ...])
1186 (custom-declare-variable (quote NAME) VALUE DOCSTRING ...)
1187 starting in column zero.
1188 (quote NAME) may appear as 'NAME as well.
1190 We also look for #@LENGTH CONTENTS^_ at the beginning of the line.
1191 When we find that, we save it for the following defining-form,
1192 and we use that instead of reading a doc string within that defining-form.
1194 For defvar, defconst, and fset we skip to the docstring with a kludgy
1195 formatting convention: all docstrings must appear on the same line as the
1196 initial open-paren (the one in column zero) and must contain a backslash
1197 and a newline immediately after the initial double-quote. No newlines
1198 must appear between the beginning of the form and the first double-quote.
1199 For defun, defmacro, and autoload, we know how to skip over the
1200 arglist, but the doc string must still have a backslash and newline
1201 immediately after the double quote.
1202 The only source files that must follow this convention are preloaded
1203 uncompiled ones like loaddefs.el; aside from that, it is always the .elc
1204 file that we should look at, and they are no problem because byte-compiler
1205 output follows this convention.
1206 The NAME and DOCSTRING are output.
1207 NAME is preceded by `F' for a function or `V' for a variable.
1208 An entry is output only if DOCSTRING has \ newline just after the opening ".
1211 static void
1212 skip_white (FILE *infile)
1214 char c = ' ';
1215 while (c == ' ' || c == '\t' || c == '\n' || c == '\r')
1216 c = getc (infile);
1217 ungetc (c, infile);
1220 static void
1221 read_lisp_symbol (FILE *infile, char *buffer)
1223 char c;
1224 char *fillp = buffer;
1226 skip_white (infile);
1227 while (1)
1229 c = getc (infile);
1230 if (c == '\\')
1231 *(++fillp) = getc (infile);
1232 else if (c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '(' || c == ')')
1234 ungetc (c, infile);
1235 *fillp = 0;
1236 break;
1238 else
1239 *fillp++ = c;
1242 if (! buffer[0])
1243 fprintf (stderr, "## expected a symbol, got '%c'\n", c);
1245 skip_white (infile);
1248 static int
1249 search_lisp_doc_at_eol (FILE *infile)
1251 int c = 0, c1 = 0, c2 = 0;
1253 /* Skip until the end of line; remember two previous chars. */
1254 while (c != '\n' && c != '\r' && c != EOF)
1256 c2 = c1;
1257 c1 = c;
1258 c = getc (infile);
1261 /* If two previous characters were " and \,
1262 this is a doc string. Otherwise, there is none. */
1263 if (c2 != '"' || c1 != '\\')
1265 #ifdef DEBUG
1266 fprintf (stderr, "## non-docstring found\n");
1267 #endif
1268 if (c != EOF)
1269 ungetc (c, infile);
1270 return 0;
1272 return 1;
1275 #define DEF_ELISP_FILE(fn) { #fn, sizeof(#fn) - 1 }
1277 static int
1278 scan_lisp_file (const char *filename, const char *mode)
1280 FILE *infile;
1281 register int c;
1282 char *saved_string = 0;
1283 /* These are the only files that are loaded uncompiled, and must
1284 follow the conventions of the doc strings expected by this
1285 function. These conventions are automatically followed by the
1286 byte compiler when it produces the .elc files. */
1287 static struct {
1288 const char *fn;
1289 size_t fl;
1290 } const uncompiled[] = {
1291 DEF_ELISP_FILE (loaddefs.el),
1292 DEF_ELISP_FILE (loadup.el),
1293 DEF_ELISP_FILE (charprop.el),
1294 DEF_ELISP_FILE (cp51932.el),
1295 DEF_ELISP_FILE (eucjp-ms.el)
1297 int i, match;
1298 size_t flen = strlen (filename);
1300 if (generate_globals)
1301 fatal ("scanning lisp file when -g specified", 0);
1302 if (flen > 3 && !strcmp (filename + flen - 3, ".el"))
1304 for (i = 0, match = 0; i < sizeof (uncompiled) / sizeof (uncompiled[0]);
1305 i++)
1307 if (uncompiled[i].fl <= flen
1308 && !strcmp (filename + flen - uncompiled[i].fl, uncompiled[i].fn)
1309 && (flen == uncompiled[i].fl
1310 || IS_SLASH (filename[flen - uncompiled[i].fl - 1])))
1312 match = 1;
1313 break;
1316 if (!match)
1317 fatal ("uncompiled lisp file %s is not supported", filename);
1320 infile = fopen (filename, mode);
1321 if (infile == NULL)
1323 perror (filename);
1324 return 0; /* No error. */
1327 c = '\n';
1328 while (!feof (infile))
1330 char buffer[BUFSIZ];
1331 char type;
1333 /* If not at end of line, skip till we get to one. */
1334 if (c != '\n' && c != '\r')
1336 c = getc (infile);
1337 continue;
1339 /* Skip the line break. */
1340 while (c == '\n' || c == '\r')
1341 c = getc (infile);
1342 /* Detect a dynamic doc string and save it for the next expression. */
1343 if (c == '#')
1345 c = getc (infile);
1346 if (c == '@')
1348 size_t length = 0;
1349 size_t i;
1351 /* Read the length. */
1352 while ((c = getc (infile),
1353 c >= '0' && c <= '9'))
1355 length *= 10;
1356 length += c - '0';
1359 if (length <= 1)
1360 fatal ("invalid dynamic doc string length", "");
1362 if (c != ' ')
1363 fatal ("space not found after dynamic doc string length", "");
1365 /* The next character is a space that is counted in the length
1366 but not part of the doc string.
1367 We already read it, so just ignore it. */
1368 length--;
1370 /* Read in the contents. */
1371 free (saved_string);
1372 saved_string = (char *) xmalloc (length);
1373 for (i = 0; i < length; i++)
1374 saved_string[i] = getc (infile);
1375 /* The last character is a ^_.
1376 That is needed in the .elc file
1377 but it is redundant in DOC. So get rid of it here. */
1378 saved_string[length - 1] = 0;
1379 /* Skip the line break. */
1380 while (c == '\n' || c == '\r')
1381 c = getc (infile);
1382 /* Skip the following line. */
1383 while (c != '\n' && c != '\r')
1384 c = getc (infile);
1386 continue;
1389 if (c != '(')
1390 continue;
1392 read_lisp_symbol (infile, buffer);
1394 if (! strcmp (buffer, "defun")
1395 || ! strcmp (buffer, "defmacro")
1396 || ! strcmp (buffer, "defsubst"))
1398 type = 'F';
1399 read_lisp_symbol (infile, buffer);
1401 /* Skip the arguments: either "nil" or a list in parens. */
1403 c = getc (infile);
1404 if (c == 'n') /* nil */
1406 if ((c = getc (infile)) != 'i'
1407 || (c = getc (infile)) != 'l')
1409 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1410 buffer, filename);
1411 continue;
1414 else if (c != '(')
1416 fprintf (stderr, "## unparsable arglist in %s (%s)\n",
1417 buffer, filename);
1418 continue;
1420 else
1421 while (c != ')')
1422 c = getc (infile);
1423 skip_white (infile);
1425 /* If the next three characters aren't `dquote bslash newline'
1426 then we're not reading a docstring.
1428 if ((c = getc (infile)) != '"'
1429 || (c = getc (infile)) != '\\'
1430 || ((c = getc (infile)) != '\n' && c != '\r'))
1432 #ifdef DEBUG
1433 fprintf (stderr, "## non-docstring in %s (%s)\n",
1434 buffer, filename);
1435 #endif
1436 continue;
1440 /* defcustom can only occur in uncompiled Lisp files. */
1441 else if (! strcmp (buffer, "defvar")
1442 || ! strcmp (buffer, "defconst")
1443 || ! strcmp (buffer, "defcustom"))
1445 type = 'V';
1446 read_lisp_symbol (infile, buffer);
1448 if (saved_string == 0)
1449 if (!search_lisp_doc_at_eol (infile))
1450 continue;
1453 else if (! strcmp (buffer, "custom-declare-variable")
1454 || ! strcmp (buffer, "defvaralias")
1457 type = 'V';
1459 c = getc (infile);
1460 if (c == '\'')
1461 read_lisp_symbol (infile, buffer);
1462 else
1464 if (c != '(')
1466 fprintf (stderr,
1467 "## unparsable name in custom-declare-variable in %s\n",
1468 filename);
1469 continue;
1471 read_lisp_symbol (infile, buffer);
1472 if (strcmp (buffer, "quote"))
1474 fprintf (stderr,
1475 "## unparsable name in custom-declare-variable in %s\n",
1476 filename);
1477 continue;
1479 read_lisp_symbol (infile, buffer);
1480 c = getc (infile);
1481 if (c != ')')
1483 fprintf (stderr,
1484 "## unparsable quoted name in custom-declare-variable in %s\n",
1485 filename);
1486 continue;
1490 if (saved_string == 0)
1491 if (!search_lisp_doc_at_eol (infile))
1492 continue;
1495 else if (! strcmp (buffer, "fset") || ! strcmp (buffer, "defalias"))
1497 type = 'F';
1499 c = getc (infile);
1500 if (c == '\'')
1501 read_lisp_symbol (infile, buffer);
1502 else
1504 if (c != '(')
1506 fprintf (stderr, "## unparsable name in fset in %s\n",
1507 filename);
1508 continue;
1510 read_lisp_symbol (infile, buffer);
1511 if (strcmp (buffer, "quote"))
1513 fprintf (stderr, "## unparsable name in fset in %s\n",
1514 filename);
1515 continue;
1517 read_lisp_symbol (infile, buffer);
1518 c = getc (infile);
1519 if (c != ')')
1521 fprintf (stderr,
1522 "## unparsable quoted name in fset in %s\n",
1523 filename);
1524 continue;
1528 if (saved_string == 0)
1529 if (!search_lisp_doc_at_eol (infile))
1530 continue;
1533 else if (! strcmp (buffer, "autoload"))
1535 type = 'F';
1536 c = getc (infile);
1537 if (c == '\'')
1538 read_lisp_symbol (infile, buffer);
1539 else
1541 if (c != '(')
1543 fprintf (stderr, "## unparsable name in autoload in %s\n",
1544 filename);
1545 continue;
1547 read_lisp_symbol (infile, buffer);
1548 if (strcmp (buffer, "quote"))
1550 fprintf (stderr, "## unparsable name in autoload in %s\n",
1551 filename);
1552 continue;
1554 read_lisp_symbol (infile, buffer);
1555 c = getc (infile);
1556 if (c != ')')
1558 fprintf (stderr,
1559 "## unparsable quoted name in autoload in %s\n",
1560 filename);
1561 continue;
1564 skip_white (infile);
1565 if ((c = getc (infile)) != '\"')
1567 fprintf (stderr, "## autoload of %s unparsable (%s)\n",
1568 buffer, filename);
1569 continue;
1571 read_c_string_or_comment (infile, 0, 0, 0);
1573 if (saved_string == 0)
1574 if (!search_lisp_doc_at_eol (infile))
1575 continue;
1578 #ifdef DEBUG
1579 else if (! strcmp (buffer, "if")
1580 || ! strcmp (buffer, "byte-code"))
1581 continue;
1582 #endif
1584 else
1586 #ifdef DEBUG
1587 fprintf (stderr, "## unrecognized top-level form, %s (%s)\n",
1588 buffer, filename);
1589 #endif
1590 continue;
1593 /* At this point, we should either use the previous dynamic doc string in
1594 saved_string or gobble a doc string from the input file.
1595 In the latter case, the opening quote (and leading backslash-newline)
1596 have already been read. */
1598 printf ("\037%c%s\n", type, buffer);
1599 if (saved_string)
1601 fputs (saved_string, stdout);
1602 /* Don't use one dynamic doc string twice. */
1603 free (saved_string);
1604 saved_string = 0;
1606 else
1607 read_c_string_or_comment (infile, 1, 0, 0);
1609 fclose (infile);
1610 return 0;
1614 /* make-docfile.c ends here */