scripts/add_parens_for_own_funcs.sh

   1 #!/bin/sh
   2 #
   3 # add_parens_for_own_funcs.sh
   4 #
   5 # This script is designed to fix inconsistencies in the use of
   6 # parentheses after function names in the manual pages.
   7 # It changes manual pages to add these parentheses.
   8 # The problem is how to determine what is a "function name".
   9 # The approach this script takes is the following:
  10 #
  11 #   For each manual page named in the command line that contains
  12 #           more than one line (i.e., skip man-page link files)
  13 #       Create a set of names taken from the .SH section of the
  14 #               page and from grepping all pages for names that
  15 #               have .so links to this page
  16 #       For each name obtained above
  17 #           If we can find something that looks like a prototype on
  18 #                   the page, then
  19 #               Try to substitute instances of that name on the page.
  20 #                   (instances are considered to be words formatted
  21 #                   using ^.[BI] or \f[BI]...\f[PR] -- this script
  22 #                   ignores unformatted instances of function names.)
  23 #           fi
  24 #       done
  25 #   done
  26 #
  27 # The rationale of the above is that the most likely function names
  28 # that appear on a page are those that the manual page is describing.
  29 # It doesn't fix everything, but it catches many instances.
  30 # The rest will have to be done manually.
  31 #
  32 # This script is rather verbose because it provides a computer-assisted
  33 # solution, rather than one that is fully automated.  When running it,
  34 # pipe the output through
  35 #
  36 #            ...  2>&1 | less
  37 #
  38 # and take a good look at the output.  In particular, you can scan
  39 # the output for *possible* problems by looking for the pattern: /^%%%/
  40 # The script's output should be enough to help you determine if the
  41 # problem is real or not.
  42 #
  43 # Suggested usage (in this case to fix pages in Section 2):
  44 #
  45 #     cd man2
  46 #     sh add_parens_for_own_funcs.sh *.2 2>&1 | tee changes.log | less
  47 #
  48 # Use the "-n" option for a dry run, in order to see what would be
  49 # done, without actually doing it.
  50 #
  51 # (And, yes, there are many ways that this script could probably be
  52 # made to work faster...)
  53 #
  54 ######################################################################
  55 #
  56 # (C) Copyright 2005 & 2013, Michael Kerrisk
  57 # This program is free software; you can redistribute it and/or
  58 # modify it under the terms of the GNU General Public License
  59 # as published by the Free Software Foundation; either version 2
  60 # of the License, or (at your option) any later version.
  61 #
  62 # This program is distributed in the hope that it will be useful,
  63 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  64 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  65 # GNU General Public License for more details
  66 # (http://www.gnu.org/licenses/gpl-2.0.html).
  67 #
  68 #
  69 #
  70
  71 file_base="tmp.$(basename $0)"
  72
  73 work_dst_file="$file_base.dst"
  74 work_src_file="$file_base.src"
  75
  76 matches_for_all_names="$file_base.all_match"
  77 matches_for_this_name="$file_base.this_match"
  78
  79 all_files="$work_dst_file $work_src_file $matches_for_all_names \
  80            $matches_for_this_name"
  81
  82 rm -f $all_files
  83
  84 # Command-line option processing
  85
  86 really_do_it=1
  87 while getopts "n" optname; do
  88     case "$optname" in
  89     n)  really_do_it=0;
  90         ;;
  91     *)  echo "Unknown option: $OPTARG"
  92         exit 1
  93         ;;
  94     esac
  95 done
  96
  97 shift $(( $OPTIND - 1 ))
  98
  99 # Only process files with > 1 line -- single-line files are link files
 100
 101 for page in $(wc "$@" 2> /dev/null | awk '$1 > 1 {print $4}'| \
 102     grep -v '^total'); do
 103
 104     echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<"
 105     echo ">>>>>>>>>>>>>>>>>>>>>>>>>" $page "<<<<<<<<<<<<<<<<<<<<<<<<<" 1>&2
 106
 107     # Extract names that follow the ".SH NAME" directive -- these will
 108     # be our guesses about function names to look for
 109
 110     sh_nlist=$(cat $page | \
 111         awk 'BEGIN { p = 0 }
 112              /^\.SH NAME/     { p = NR }
 113              /^.SH/ && NR > p { p = 0 }     # Stop at the next .SH directive
 114              p > 0 && NR > p  { print $0 }  # These are the lines between
 115                                             # the two .SH directives
 116             ')
 117     sh_nlist=$(echo $sh_nlist | sed -e 's/ *\\-.*//' -e 's/, */ /g')
 118     echo "### .SH name list:" $sh_nlist
 119
 120     # Some pages like msgop.2 don't actually list the function names in
 121     # the .SH section -- but we can try using link pages to give us
 122     # another guess at the right function names to look for
 123
 124     so_nlist=$(grep -l "^\\.so.*/$(echo $page| \
 125              sed -e 's/\.[1-8]$//')\\." $* | \
 126              sed -e 's/\.[1-8]$//g')
 127
 128     echo "### .so name list:" $so_nlist
 129
 130     # Combine the two lists, eliminate duplicates
 131
 132     nlist=$(echo $sh_nlist $so_nlist | tr ' ' '\012' | sort -u)
 133
 134     maybechanged=0
 135
 136     cp $page $work_dst_file
 137     rm -f $matches_for_all_names; # touch $matches_for_all_names
 138
 139     for rname in $nlist; do     # try each name from out list for this page
 140
 141         # A very few names in .SH sections contain regexp characters!
 142
 143         name=$(echo $rname | sed -e 's/\*/\\*/g' -e 's/\./\\./g' \
 144                 -e 's/\[/\\[/g' -e 's/\+/\\+/g')
 145
 146         echo "########## trying $rname ##########"
 147
 148         rm -f $matches_for_this_name
 149
 150         grep "^.BR* $name *$" $page | \
 151             >> $matches_for_this_name
 152         grep "^.BR $name [^(\"]$" $page | \
 153             >> $matches_for_this_name
 154         grep '\\fB'"$name"'\\f[PR][ .,;:]' $page | \
 155             >> $matches_for_this_name
 156         grep '\\fB'"$name"'\\f[PR]$' $page | \
 157             >> $matches_for_this_name
 158
 159         cat $matches_for_this_name | sed -e 's/^/### MATCH: /'
 160         cat $matches_for_this_name >> $matches_for_all_names
 161
 162         # Only process a page if we can see something that looks
 163         # like a function prototype for this name in the page
 164
 165         if grep -q "$name *(" $page || \
 166             grep -q "$name\\\\f.[\\ ]*(" $page; then
 167
 168             # '.B name$'
 169             # '.BR name [^("]*$
 170             # (The use of [^"] in the above eliminates lines
 171             # like: .BR func " and " func
 172             # Those lines better be done manually.)
 173             cp $work_dst_file $work_src_file
 174             cat $work_src_file | \
 175                 sed \
 176                 -e "s/^.BR* $name *\$/.BR $name ()/" \
 177                 -e "/^.BR *$name [^(\"]*\$/s/^.BR *$name /.BR $name ()/" \
 178                 > $work_dst_file
 179
 180             # '\fBname\fP[ .,;:]'
 181             # '\fBname\fP$'
 182             cp $work_dst_file $work_src_file
 183             cat $work_src_file | \
 184                 sed \
 185                 -e 's/\\fB'$name'\\fP /\\fB'$name'\\fP() /g' \
 186                 -e 's/\\fB'$name'\\fP\./\\fB'$name'\\fP()./g' \
 187                 -e 's/\\fB'$name'\\fP,/\\fB'$name'\\fP(),/g' \
 188                 -e 's/\\fB'$name'\\fP;/\\fB'$name'\\fP();/g' \
 189                 -e 's/\\fB'$name'\\fP:/\\fB'$name'\\fP():/g' \
 190                 -e 's/\\fB'$name'\\fP$/\\fB'$name'\\fP()/g' \
 191                 > $work_dst_file
 192
 193             # '\fBname\fR[ .,;:]'
 194             # '\fBname\fR$'
 195             cp $work_dst_file $work_src_file
 196             cat $work_src_file | \
 197                 sed \
 198                 -e 's/\\fB'$name'\\fR /\\fB'$name'\\fR() /g' \
 199                 -e 's/\\fB'$name'\\fR\./\\fB'$name'\\fR()./g' \
 200                 -e 's/\\fB'$name'\\fR,/\\fB'$name'\\fR(),/g' \
 201                 -e 's/\\fB'$name'\\fR;/\\fB'$name'\\fR();/g' \
 202                 -e 's/\\fB'$name'\\fR:/\\fB'$name'\\fR():/g' \
 203                 -e 's/\\fB'$name'\\fR$/\\fB'$name'\\fR()/g' \
 204                 > $work_dst_file
 205
 206             maybechanged=1
 207         else
 208             echo "%%%%%%%%%% WARNING: NO PROTOTYPE MATCHES FOR: $name"
 209         fi
 210     done
 211
 212     # If the file was changed, then:
 213     # show "diff -U" output to user;
 214     # and count number of changed lines and compare it with what
 215     # we expected, displaying a warning if it wasn't what was expected
 216
 217     if test $maybechanged -ne 0 && ! cmp -s $page $work_dst_file; then
 218         diff -u $page $work_dst_file
 219
 220         made_matches=$(diff -U 0 $page $work_dst_file | grep '^\+[^+]' | \
 221                 wc -l | awk '{print $1}')
 222
 223         # The following line makes the changes -- comment it out if you
 224         # just want to do a dry run to see what changes would be made.
 225
 226         if test $really_do_it -ne 0; then
 227             cat $work_dst_file > $page
 228         fi
 229
 230     else
 231         echo "### NOTHING CHANGED"
 232         made_matches=0
 233     fi
 234
 235     min_match=$(cat $matches_for_all_names | \
 236             sort -u | wc -l | awk '{print $1}')
 237
 238     echo "### Expected matches >= $min_match"
 239     echo "### Made matches $made_matches"
 240
 241     if test $made_matches -lt $min_match; then
 242         echo "%%%%%%%%%% WARNING: NOT ENOUGH MATCHES: " \
 243             "$made_matches < $min_match"
 244     fi
 245
 246 done
 247
 248 # clean up
 249
 250 rm -f $all_files
 251 exit 0