gtk-doc: refactor regexps and reparsing
[gtk-doc.git] / gtkdoc-common.pl.in
blobac6274475c29496c8e1af2192845a4599a243677
1 #!@PERL@ -w
2 # -*- cperl -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 2001 Damon Chaplin
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 # These are functions used by several of the gtk-doc Perl scripts.
24 # We'll move more of the common routines here eventually, though they need to
25 # stop using global variables first.
31 #############################################################################
32 # Function : UpdateFileIfChanged
33 # Description : Compares the old version of the file with the new version and
34 # if the file has changed it moves the new version into the old
35 # versions place. This is used so we only change files if
36 # needed, so we can do proper dependency tracking and we don't
37 # needlessly check files into version control systems that haven't
38 # changed.
39 # It returns 0 if the file hasn't changed, and 1 if it has.
40 # Arguments : $old_file - the pathname of the old file.
41 # $new_file - the pathname of the new version of the file.
42 # $make_backup - 1 if a backup of the old file should be kept.
43 # It will have the .bak suffix added to the file name.
44 #############################################################################
46 sub UpdateFileIfChanged {
47 my ($old_file, $new_file, $make_backup) = @_;
49 #print "Comparing $old_file with $new_file...\n";
51 # If the old file doesn't exist we want this to default to 1.
52 my $exit_code = 1;
54 if (-e $old_file) {
55 `cmp -s "$old_file" "$new_file"`;
56 $exit_code = $? >> 8;
57 #print " cmp exit code: $exit_code ($?)\n";
60 if ($exit_code > 1) {
61 die "Error running 'cmp $old_file $new_file'";
64 if ($exit_code == 1) {
65 #print " files changed - replacing old version with new version.\n";
66 if ($make_backup && -e $old_file) {
67 rename ($old_file, "$old_file.bak")
68 || die "Can't move $old_file to $old_file.bak: $!";
70 rename ($new_file, $old_file)
71 || die "Can't move $new_file to $old_file: $!";
73 return 1;
74 } else {
75 #print " files the same - deleting new version.\n";
76 unlink ("$new_file")
77 || die "Can't delete file: $new_file: $!";
79 return 0;
84 #############################################################################
85 # Function : ParseStructDeclaration
86 # Description : This function takes a structure declaration and
87 # breaks it into individual type declarations.
88 # Arguments : $declaration - the declaration to parse
89 # $is_object - true if this is an object structure
90 # $output_function_params - true if full type is wanted for
91 # function pointer members
92 # $typefunc - function reference to apply to type
93 # $namefunc - function reference to apply to name
94 #############################################################################
96 sub ParseStructDeclaration {
97 my ($declaration, $is_object, $output_function_params, $typefunc, $namefunc) = @_;
99 # For forward struct declarations just return an empty array.
100 if ($declaration =~ m/(?:struct|union)\s+\S+\s*;/msg) {
101 return ();
104 # Remove all private parts of the declaration
106 # For objects, assume private
107 if ($is_object) {
108 $declaration =~ s!((?:struct|union)\s+\w*\s*\{)
110 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))!$1!msgx;
113 # Assume end of declaration if line begins with '}'
114 $declaration =~ s!\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))!!msgx;
116 # Remove all other comments;
117 $declaration =~ s@/\*([^*]+|\*(?!/))*\*/@ @g;
119 my @result = ();
121 if ($declaration =~ /^\s*$/) {
122 return @result;
125 # Prime match after "struct/union {" declaration
126 if (!scalar($declaration =~ m/(?:struct|union)\s+\w*\s*\{/msg)) {
127 die "Declaration '$declaration' does not begin with struct/union [NAME] {\n";
130 #print "DEBUG: public fields in struct/union: $declaration\n";
132 # Treat lines in sequence, allowing singly nested anonymous structs
133 # and unions.
134 while ($declaration =~ m/\s*([^{;]+(\{[^\}]*\}[^{;]+)?);/msg) {
135 my $line = $1;
137 last if $line =~ /^\s*\}\s*\w*\s*$/;
139 # FIXME: Just ignore nested structs and unions for now
140 next if $line =~ /{/;
142 # ignore preprocessor directives
143 while ($line =~ /^#.*?\n\s*(.*)/msg) {
144 $line=$1;
147 last if $line =~ /^\s*\}\s*\w*\s*$/;
149 # Try to match structure members which are functions
150 if ($line =~ m/^
151 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
152 (\w+)\s* # type
153 (\**(?:\s*restrict)?)\s* # ptr1
154 (const\s+)? # mod2
155 (\**\s*) # ptr2
156 (const\s+)? # mod3
157 \(\s*\*\s*(\w+)\s*\)\s* # name
158 \(([^)]*)\)\s* # func_params
159 $/x) {
161 my $mod1 = defined($1) ? $1 : "";
162 if (defined($2)) { $mod1 .= $2; }
163 my $type = $3;
164 my $ptr1 = $4;
165 my $mod2 = defined($5) ? $5 : "";
166 my $ptr2 = $6;
167 my $mod3 = defined($7) ? $7 : "";
168 my $name = $8;
169 my $func_params = $9;
170 my $ptype = defined $typefunc ? $typefunc->($type, "<type>$type</type>") : $type;
171 my $pname = defined $namefunc ? $namefunc->($name) : $name;
173 push @result, $name;
175 if ($output_function_params) {
176 push @result, "$mod1$ptype$ptr1$mod2$ptr2$mod3 (*$pname) ($func_params)";
177 } else {
178 push @result, "$pname&#160;()";
182 # Try to match normal struct fields of comma-separated variables/
183 } elsif ($line =~ m/^
184 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
185 (\w+)\s* # type
186 (\** \s* const\s+)? # mod2
187 (.*) # variables
188 $/x) {
190 my $mod1 = defined($1) ? $1 : "";
191 if (defined($2)) { $mod1 .= $2; }
192 my $type = $3;
193 my $ptype = defined $typefunc ? $typefunc->($type, "<type>$type</type>") : $type;
194 my $mod2 = defined($4) ? " " . $4 : "";
195 my $list = $5;
197 #print "'$mod1' '$type' '$mod2' '$list' \n";
199 $mod1 =~ s/ /&#160;/g;
200 $mod2 =~ s/ /&#160;/g;
202 my @names = split /,/, $list;
203 for my $n (@names) {
204 # Each variable can have any number of '*' before the
205 # identifier, and be followed by any number of pairs of
206 # brackets or a bit field specifier.
207 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
208 if ($n =~ m/^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $/x) {
209 my $ptrs = $1;
210 my $name = $2;
211 my $array = defined($3) ? $3 : "";
212 my $bits = defined($4) ? " $4" : "";
214 if ($ptrs && $ptrs !~ m/\*$/) { $ptrs .= " "; }
215 $array =~ s/ /&#160;/g;
216 $bits =~ s/ /&#160;/g;
218 push @result, $name;
219 if (defined $namefunc) {
220 $name = $namefunc->($name);
222 push @result, "$mod1$ptype$mod2&#160;$ptrs$name$array$bits;";
224 #print "***** Matched line: $mod1$ptype$mod2 $ptrs$name$array$bits\n";
225 } else {
226 print "WARNING: Couldn't parse struct field: $n\n";
230 } else {
231 print "WARNING: Cannot parse structure field: \"$line\"\n";
235 return @result;
239 #############################################################################
240 # Function : ParseEnumDeclaration
241 # Description : This function takes a enumeration declaration and
242 # breaks it into individual enum member declarations.
243 # Arguments : $declaration - the declaration to parse
244 #############################################################################
246 sub ParseEnumDeclaration {
247 my ($declaration, $is_object) = @_;
249 # For forward enum declarations just return an empty array.
250 if ($declaration =~ m/enum\s+\S+\s*;/msg) {
251 return ();
254 # Remove comments;
255 $declaration =~ s@/\*([^*]+|\*(?!/))*\*/@ @g;
257 my @result = ();
259 if ($declaration =~ /^\s*$/) {
260 return @result;
263 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
264 # to avoid getting confused by commas they might contain. This
265 # doesn't handle nested parentheses correctly.
267 $declaration =~ s/\([^)]+\)//g;
269 # Remove comma from comma - possible whitespace - closing brace sequence
270 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
271 # result in an actual enum member
273 $declaration =~ s/,(\s*})/$1/g;
275 # Prime match after "typedef enum {" declaration
276 if (!scalar($declaration =~ m/(typedef\s+)?enum\s*(\S+\s*)?\{/msg)) {
277 die "Enum declaration '$declaration' does not begin with 'typedef enum {' or 'enum XXX {'\n";
280 # Treat lines in sequence.
281 while ($declaration =~ m/\s*([^,\}]+)([,\}])/msg) {
282 my $line = $1;
283 my $terminator = $2;
285 # ignore preprocessor directives
286 while ($line =~ /^#.*?\n\s*(.*)/msg) {
287 $line=$1;
290 if ($line =~ m/^(\w+)\s*(=.*)?$/msg) {
291 push @result, $1;
293 # Special case for GIOCondition, where the values are specified by
294 # macros which expand to include the equal sign like '=1'.
295 } elsif ($line =~ m/^(\w+)\s*GLIB_SYSDEF_POLL/msg) {
296 push @result, $1;
298 # Special case include of <gdk/gdkcursors.h>, just ignore it
299 } elsif ($line =~ m/^#include/) {
300 last;
302 # Special case for #ifdef/#else/#endif, just ignore it
303 } elsif ($line =~ m/^#(?:if|else|endif)/) {
304 last;
306 } else {
307 warn "Cannot parse enumeration member \"$line\"";
310 last if $terminator eq '}';
313 return @result;
317 #############################################################################
318 # Function : ParseFunctionDeclaration
319 # Description : This function takes a function declaration and
320 # breaks it into individual parameter declarations.
321 # Arguments : $declaration - the declaration to parse
322 # $typefunc - function reference to apply to type
323 # $namefunc - function reference to apply to name
324 #############################################################################
326 sub ParseFunctionDeclaration {
327 my ($declaration, $typefunc, $namefunc) = @_;
329 my @result = ();
331 my ($param_num) = 0;
332 while ($declaration ne "") {
333 #print "$declaration";
335 if ($declaration =~ s/^[\s,]+//) {
336 # skip whitespace and commas
337 next;
339 } elsif ($declaration =~ s/^void\s*[,\n]//) {
340 if ($param_num != 0) {
341 # FIXME: whats the problem here?
342 &LogWarning ($SymbolSourceFile{$symbol},$SymbolSourceLine{$symbol}, "void used as parameter in function $symbol");
344 push @result, "void";
345 my $xref = "<type>void</type>";
346 my $label = defined $namefunc ? $namefunc->($xref) : $xref;
347 push @result, $label;
349 } elsif ($declaration =~ s/^...\s*[,\n]//) {
350 push @result, "Varargs";
351 my $label = defined $namefunc ? $namefunc->("...") : "...";
352 push @result, $label;
354 # allow alphanumerics, '_', '[' & ']' in param names
355 # Try to match a standard parameter
356 # $1 $2 $3 $4 $5
357 } elsif ($declaration =~ s/^\s*((?:G_CONST_RETURN|G_GNUC_UNUSED|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b)?\s*\*?\s*(?:const\b|restrict\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*[,\n]//) {
358 my $pre = defined($1) ? $1 : "";
359 my $type = $2;
360 my $ptr = defined($3) ? $3 : "";
361 my $name = defined($4) ? $4 : "";
362 my $array = defined($5) ? $5 : "";
364 $pre =~ s/\s+/ /g;
365 $type =~ s/\s+/ /g;
366 $ptr =~ s/\s+/ /g;
367 $ptr =~ s/\s+$//;
368 if ($ptr && $ptr !~ m/\*$/) { $ptr .= " "; }
370 #print "$symbol: '$pre' '$type' '$ptr' '$name' '$array'\n";
372 if (($name eq "") && $pre =~ m/^((un)?signed .*)\s?/ ) {
373 $name = $type;
374 $type = "$1";
375 $pre = "";
378 if ($name eq "") {
379 $name = "Param" . ($param_num + 1);
382 #print "$symbol: '$pre' '$type' '$ptr' '$name' '$array'\n";
384 push @result, $name;
385 my $xref = defined $typefunc ? $typefunc->($type, "<type>$type</type>") : $type;
386 my $label = "$pre$xref $ptr$name$array";
387 if (defined $namefunc) {
388 $label = $namefunc->($label)
390 push @result, $label;
392 # Try to match parameters which are functions
393 # $1 $2 $3 $4 $5 $7 $8
394 } elsif ($declaration =~ s/^(const\s+|G_CONST_RETURN\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*\*+\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]//) {
395 my $mod1 = defined($1) ? $1 : "";
396 if (defined($2)) { $mod1 .= $2; }
397 my $type = $3;
398 my $ptr1 = $4;
399 my $mod2 = defined($5) ? $5 : "";
400 my $func_ptr = $6;
401 my $name = $7;
402 my $func_params = defined($8) ? $8 : "";
404 #if (!defined($type)) { print "## no type\n"; };
405 #if (!defined($ptr1)) { print "## no ptr1\n"; };
406 #if (!defined($func_ptr)) { print "## no func_ptr\n"; };
407 #if (!defined($name)) { print "## no name\n"; };
409 if ($ptr1 && $ptr1 !~ m/\*$/) { $ptr1 .= " "; }
410 $func_ptr =~ s/\s+//g;
412 #print "Type: [$mod1][$xref][$ptr1][$mod2] ([$func_ptr][$name]) ($func_params)\n";
414 push @result, $name;
415 my $xref = defined $typefunc ? $typefunc->($type, "<type>$type</type>") : $type;
416 my $label = "$mod1$xref$ptr1$mod2 ($func_ptr$name) ($func_params)";
417 if (defined $namefunc) {
418 $label = $namefunc->($label)
420 push @result, $label;
421 } else {
422 &LogWarning ($SymbolSourceFile{$symbol},$SymbolSourceLine{$symbol},
423 "Can't parse args for function $symbol: $declaration");
424 last;
426 $param_num++;
429 return @result;
433 #############################################################################
434 # Function : ParseMacroDeclaration
435 # Description : This function takes a macro declaration and
436 # breaks it into individual parameter declarations.
437 # Arguments : $declaration - the declaration to parse
438 # $namefunc - function reference to apply to name
439 #############################################################################
441 sub ParseMacroDeclaration {
442 my ($declaration, $namefunc) = @_;
444 my @result = ();
446 if ($declaration =~ m/^\s*#\s*define\s+\w+\(([^\)]*)\)/) {
447 my $params = $1;
449 $params =~ s/\\\n//g;
450 foreach $param (split (/,/, $params)) {
451 $param =~ s/^\s+//;
452 $param =~ s/\s*$//;
453 if ($param =~ m/\S/) {
454 push @result, $param;
455 push @result, defined $namefunc ? $namefunc->($param) : $param;
460 return @result;
464 #############################################################################
465 # Function : LogWarning
466 # Description : Log a warning in gcc style format
467 # Arguments : $file - the file the error comes from
468 # $line - line number for the wrong entry
469 # $message - description of the issue
470 #############################################################################
472 sub LogWarning {
473 my ($file, $line, $message) = @_;
475 $file="unknown" if !defined($file);
476 $line="0" if !defined($line);
478 print "$file:$line: warning: $message\n"
482 #############################################################################
483 # Function : CreateValidSGMLID
484 # Description : Creates a valid SGML 'id' from the given string.
485 # According to http://www.w3.org/TR/html4/types.html#type-id
486 # "ID and NAME tokens must begin with a letter ([A-Za-z]) and
487 # may be followed by any number of letters, digits ([0-9]),
488 # hyphens ("-"), underscores ("_"), colons (":"), and
489 # periods (".")."
491 # NOTE: When creating SGML IDS, we append ":CAPS" to all
492 # all-caps identifiers to prevent name clashes (SGML ids are
493 # case-insensitive). (It basically never is the case that
494 # mixed-case identifiers would collide.)
495 # Arguments : $id - the string to be converted into a valid SGML id.
496 #############################################################################
498 sub CreateValidSGMLID {
499 my ($id) = $_[0];
501 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
502 if ($id eq "_") { return "gettext-macro"; }
504 $id =~ s/[_ ]/-/g;
505 $id =~ s/[,;]//g;
506 $id =~ s/^-*//;
507 $id =~ s/::/-/g;
508 $id =~ s/:/--/g;
510 # Append ":CAPS" to all all-caps identifiers
511 # FIXME: there are some inconsistencies here, we have sgml.index files
512 # containing e.g. TRUE--CAPS
513 if ($id !~ /[a-z]/ && $id !~ /-CAPS$/) { $id .= ":CAPS" };
515 return $id;