2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 2002, 2003 Free Software Foundation
4 Copyright (C) 1999, 2000 Bryan Burns
5 Copyright (C) 2000 Cory Hollingsworth
7 Parts of this program are base on Bryan Burns work with fastjar
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 /* Id: jargrep.c,v 1.5 2002/01/03 04:57:56 rodrigc Exp
28 Revision 1.5 2002/01/03 04:57:56 rodrigc
29 2001-01-02 Craig Rodrigues <rodrigc@gcc.gnu.org>
32 * configure.in (AC_CHECK_HEADERS): Check for stdlib.h.
33 * Makefile.am: Move grepjar to bin_PROGRAMS.
34 * config.h.in: Regenerated.
35 * Makefile.in: Regenerated.
36 * aclocal.m4: Regenerated.
37 * jargrep.c: Eliminate some signed/unsigned and default
38 uninitialized warnings. Use HAVE_STDLIB_H instead of
40 * jartool.c: Likewise.
41 * compress.c: Likewise.
43 Revision 1.4 2000/12/15 18:45:09 tromey
44 * jargrep.c: Include getopt.h if it exists.
46 * configure, config.h: Rebuilt.
47 * configure.in: Check for getopt.h.
49 Revision 1.3 2000/12/14 18:45:35 ghazi
52 * compress.c: Include stdlib.h and compress.h.
54 (report_str_error): Make static.
55 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
56 (hrd_inflate_str): Likewise.
58 * compress.h (init_compression, end_compression, init_inflation,
59 end_inflation): Prototype void arguments.
61 * dostime.c (rcsid): Delete.
63 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
64 Make functions static. Cast ctype function argument to `unsigned
65 char'. Add parens in if-stmts. Constify.
66 (Usage): Change into a macro.
67 (jargrep): Remove unused parameter.
69 * jartool.c: Constify. Add parens in if-stmts. Align
70 signed/unsigned char pointers in functions calls using casts.
72 (list_jar): Fix printf format specifier.
73 (usage): Chop long string into bits. Reformat.
75 * pushback.c (rcsid): Delete.
77 Revision 1.2 2000/12/11 02:59:55 apbianco
78 2000-12-10 Robert Lipe <robertlipe@usa.net>
80 * jargrep.c (jargrep): Added null statement after case.
82 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
85 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
87 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
89 Revision 1.1 2000/12/09 03:08:23 apbianco
90 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
94 Revision 1.8 2000/09/13 14:02:02 cory
95 Reformatted some of the code to more closly match the layout of the orriginal
98 Revision 1.7 2000/09/12 22:29:36 cory
99 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
100 will test some other platforms later.
110 #include <sys/types.h>
111 #include <sys/stat.h>
122 #include "pushback.h"
125 #include "compress.h"
129 void help(const char *name
);
131 #define Usage "Usage: %s [-bcinsVw] [--version|--help] <-e PATTERN | PATTERN> FILE ...\n"
134 Function name: opt_valid
135 arg: options Bitfield flag that contains the command line options of grepjar.
136 purpose: To guard agains the occurance of certain incompatible flags being used
138 returns: TRUE if options are valid, FALSE otherwise.
141 static int opt_valid(int options
) {
144 if((options
& JG_PRINT_COUNT
) &&
145 (options
& (JG_PRINT_BYTEOFFSET
| JG_PRINT_LINE_NUMBER
)))
155 Function name: create_regexp
156 args: regstr String containing the uncompiled regular expression. This may be the
157 expression as is passed in through argv.
158 options This is the flag containing the commandline options that have been
160 purpose: Handle the exception handling involved with setting upt a new regular
162 returns: Newly allocated compile regular expression ready to be used in an regexec call.
165 static regex_t
*create_regexp(const char *regstr
, int options
) {
171 if((exp
= (regex_t
*) malloc(sizeof(regex_t
))))
173 if((errcode
= regcomp(exp
, regstr
, (options
& JG_IGNORE_CASE
) ? REG_ICASE
: 0))) {
174 fprintf(stderr
, "regcomp of regex failed,\n");
175 if((errmsg
= (char *) malloc(msgsize
= regerror(errcode
, exp
, NULL
, 0) + 1))) {
176 regerror(errcode
, exp
, errmsg
, msgsize
);
177 fprintf(stderr
, "Error: %s\n", errmsg
);
183 fprintf(stderr
, "Malloc of errmsg failed.\n");
184 fprintf(stderr
, "Error: %s\n", strerror(errno
));
191 fprintf(stderr
, "Malloc of regex failed,\n");
192 fprintf(stderr
, "Error: %s\n", strerror(errno
));
200 Function name: check_sig
201 args: scratch Pointer to array of bytes containing signature.
202 pbf Pointer to push back handle for jar file.
203 purpose: Verify that checksum is correct.
204 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
205 we have read beyound the embedded file list and can exit knowing we have read all the
206 relevent information. 2 means we still haven't reached embdedded file list and need to
207 do some more reading.
209 static int check_sig(ub1
*scratch
, pb_file
*pbfp
) {
213 signature
= UNPACK_UB4(scratch
, 0);
216 printf("signature is %x\n", signature
);
218 if(signature
== 0x08074b50){
220 printf("skipping data descriptor\n");
222 pb_read(pbfp
, scratch
, 12);
224 } else if(signature
== 0x02014b50){
226 printf("Central header reached.. we're all done!\n");
229 }else if(signature
!= 0x04034b50){
230 printf("Ick! %#x\n", signature
);
238 Function name: decd_siz
239 args csize Pointer to embedded file's compressed size.
240 usize Pointer to embedded file's uncmpressed size.
241 fnlen Pointer to embedded file's file name length.
242 elfen Pointer to length of extra fields in jar file.
243 flags Pointer to bitmapped flags.
244 method Pointer to indicator of storage method of embedded file.
245 file_header Pointer to string containing the above values to be unbacked.
246 Purpose: Unpack the series of values from file_header.
249 static void decd_siz(ub4
*csize
, ub4
*usize
, ub2
*fnlen
, ub2
*eflen
, ub2
*flags
, ub2
*method
, ub1
*file_header
) {
250 *csize
= UNPACK_UB4(file_header
, LOC_CSIZE
);
252 printf("Compressed size is %u\n", *csize
);
255 *usize
= UNPACK_UB4(file_header
, LOC_USIZE
);
257 printf("Uncompressed size is %u\n", *usize
);
260 *fnlen
= UNPACK_UB2(file_header
, LOC_FNLEN
);
262 printf("Filename length is %hu\n", *fnlen
);
265 *eflen
= UNPACK_UB2(file_header
, LOC_EFLEN
);
267 printf("Extra field length is %hu\n", *eflen
);
270 *flags
= UNPACK_UB2(file_header
, LOC_EXTRA
);
272 printf("Flags are %#hx\n", *flags
);
275 *method
= UNPACK_UB2(file_header
, LOC_COMP
);
277 printf("Compression method is %#hx\n", *method
);
283 Function name: new_filename
284 args: pbf Pointer to push back file handle. Used for reading input file.
285 len Length of file name to be read.
286 purpose: Read in the embedded file name from jar file.
287 returns: Pointer to newly allocated string containing file name.
290 static char *new_filename(pb_file
*pbf
, ub4 len
) {
293 if(!(filename
= (char *) malloc(len
+ 1))) {
294 fprintf(stderr
, "Malloc failed of filename\n");
295 fprintf(stderr
, "Error: %s\n", strerror(errno
));
297 pb_read(pbf
, filename
, len
);
298 filename
[len
] = '\0';
301 printf("filename is %s\n", filename
);
308 Funtion name: read_string
309 args: pbf Pointer to push back file handle. Used for reading input file.
310 size Size of embedded file in bytes.
311 purpose: Create a string containing the contents of the embedded noncompressed file.
312 returns: Pointer to newly allocated string containing embedded file contents.
315 static char *read_string(pb_file
*pbf
, int size
) {
318 if((page
= (char *) malloc(size
+ 1))) {
319 pb_read(pbf
, page
, size
);
323 fprintf(stderr
, "Malloc of page buffer failed.\n");
324 fprintf(stderr
, "Error: %s\n", strerror(errno
));
332 Function name: extract_line
333 args: stream String containing the full contents of a file which is to be substringed
334 in order to provide line representing our grep output.
335 begin Index into stream which regular expression first matches.
336 end Index into stream which end of match to the regular expression.
337 b Pointer to the index of what will be the beginning of the line when
338 string is returned. Used for -b option.
339 purpose: Create a string that can be printed by jargrep from the long string stream.
340 The matching line that is printed out by jargrep is generated by this function.
341 returns: Pointer to newly allocated string containing matched expression.
344 static char *extract_line(const char *stream
, regoff_t begin
, regoff_t end
, int *b
) {
349 for(*b
= begin
; *b
>= 0 && !iscntrl((unsigned char)stream
[*b
]); (*b
)--);
351 for(e
= end
; stream
[e
] == '\t' || !iscntrl((unsigned char)stream
[e
]); e
++);
353 if((retstr
= (char *) malloc(length
+ 1))) {
354 sprintf(retstr
, "%d:", *b
);
355 strncpy(retstr
, &(stream
[*b
]), length
);
356 retstr
[length
] = '\0';
359 fprintf(stderr
, "Malloc failed of output string.\n");
360 fprintf(stderr
, "Error: %s\n", strerror(errno
));
368 Function name: chk_wrd
369 args: exp Pointer to compiled POSIX style regular expression of search target.
370 str String known to contain at least one match of exp.
371 purpose: Verify that the occurance of the regular expression in str occurs as a whole
372 word and not a substring of another word.
373 returns: TRUE if it is a word, FALSE of it is a substring.
376 static int chk_wrd(regex_t
*exp
, const char *str
) {
384 frnt_ok
= bck_ok
= FALSE
;
385 while(!wrd_fnd
&& !regexec(exp
, str2
, 1, &match
, 0)) {
386 if(!match
.rm_so
&& (str2
== str
)) frnt_ok
= TRUE
;
387 else if(!isalnum((unsigned char)str2
[match
.rm_so
- 1])
388 && str2
[match
.rm_so
- 1] != '_')
390 else frnt_ok
= FALSE
;
392 if(str2
[match
.rm_eo
] == '\0') bck_ok
= TRUE
;
393 else if(!isalnum((unsigned char)str2
[match
.rm_eo
])
394 && str2
[match
.rm_eo
] != '_')
398 wrd_fnd
= frnt_ok
&& bck_ok
;
399 str2
= &(str2
[match
.rm_eo
]);
406 Function name: prnt_mtchs
407 args: exp Pointer to compiled POSIX style regular expression of search target.
408 filename String containing the name of the embedded file which matches have
410 stream String containing the processed contents of the embedded jar file
411 represended with filename.
412 pmatch Array of regmatch_t matches into stream.
413 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
414 not set on command line.
415 num Number of matches in pmatch array.
416 lines Number of lines in file. Not set if -n is not set on command line.
417 options Bitwise flag containing flags set to represent the command line
419 purpose: Control output of jargrep. Output is controlled by which options have been
420 set at the command line.
423 static void prnt_mtchs(regex_t
*exp
, const char *filename
, const char *stream
, regmatch_t
*pmatch
, regmatch_t
*nl_offset
, int num
, int lines
, int options
) {
433 for(i
= 0; i
< num
; i
++) {
434 str
= extract_line(stream
, pmatch
[i
].rm_so
, pmatch
[i
].rm_eo
, &begin
);
435 if(begin
> o_begin
) {
436 if(!(options
& JG_WORD_EXPRESSIONS
) || chk_wrd(exp
, str
)) {
438 if(!(options
& JG_PRINT_COUNT
)) {
439 printf("%s:", filename
);
440 if(options
& JG_PRINT_LINE_NUMBER
) {
441 for(; j
< lines
&& nl_offset
[j
].rm_so
< begin
; j
++);
442 printf("%d:", j
+ 1);
444 if(options
& JG_PRINT_BYTEOFFSET
) printf("%d:", begin
);
452 if(options
& JG_PRINT_COUNT
) printf("%s:%d\n", filename
, ln_cnt
);
456 Function name: check_crc
457 args: pbf Pointer to pushback file pointer for jar file.
458 stream String containing the non modified contents fo the extraced file entry.
459 usize Size of file in bytes.
460 purpose: Verify the CRC matches that as what is stored in the jar file.
463 static void check_crc(pb_file
*pbf
, const char *stream
, ub4 usize
) {
468 crc
= crc32(crc
, NULL
, 0);
469 crc
= crc32(crc
, (const unsigned char *)stream
, usize
);
470 if(pb_read(pbf
, scratch
, 16) != 16) {
474 if(UNPACK_UB4(scratch
, 0) != 0x08074b50) {
475 fprintf(stderr
, "Error! Missing data descriptor!\n");
478 lcrc
= UNPACK_UB4(scratch
, 4);
480 fprintf(stderr
, "Error! CRCs do not match! Got %x, expected %x\n",
487 Function name mk_ascii
488 args: stream String that contains the contents of the extraced file entry.
490 purpose: Make certain that the contents of the file are ASCII, not binary. This
491 permits grepping of binary files as well by converting non ASCII and control characters
495 static void mk_ascii(char *stream
, size_t usize
) {
498 for(i
= 0; i
< usize
; i
++)
500 && (iscntrl((unsigned char)stream
[i
])
501 || (unsigned char) stream
[i
] >= 128))
506 Funtion name: fnd_match
507 args: exp Pointer to compiled POSIX style regular expression of search target.
508 str_stream String that contains the contents of the extracted file entry.
509 i Pointer to counter and index of matches.
510 purpose: Search str_stream for occurances of the regular expression exp and create
512 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
513 and end of matches. NULL is returned upon no matches found.
516 static regmatch_t
*fnd_match(regex_t
*exp
, const char *str_stream
, int *i
) {
519 regmatch_t
*match_array
;
523 for(*i
= 0, regflag
= regexec(exp
, str_stream
, 1, &match
, 0); !regflag
;
524 regflag
= regexec(exp
, &(str_stream
[match
.rm_eo
]), 1, &match
, 0), (*i
)++)
526 if((tmp
= (regmatch_t
*)
527 realloc(match_array
, sizeof(regmatch_t
) * ((*i
) + 1))))
531 match
.rm_so
+= match_array
[(*i
) - 1].rm_eo
;
532 match
.rm_eo
+= match_array
[(*i
) - 1].rm_eo
;
534 match_array
[*i
] = match
;
537 fprintf(stderr
, "Realloc of match_array failed.\n");
538 fprintf(stderr
, "Error: %s\n", strerror(errno
));
547 Function name: cont_grep
548 args: exp Pointer to compiled POSIX style regular expression of search target.
549 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
550 argument is NULL unless the -n option is used on the command line.
551 fd File descriptor of the jar file being grepped.
552 pbf Pointer to pushback file style file stream. This is for use with
553 the pushback.c file io funtions.
554 options Bitwise flag containing flags set to represent the command line options.
555 purpose: This function handles single entries in an open jar file. The header is
556 read and then the embeded file is extracted and grepped.
557 returns: FALSE upon failure, TRUE otherwise.
560 static int cont_grep(regex_t
*exp
, regex_t
*nl_exp
, int fd
, pb_file
*pbf
, int options
) {
573 regmatch_t
*match_array
;
574 regmatch_t
*nl_offsets
=0;
576 if(pb_read(pbf
, (file_header
+ 4), 26) != 26) {
581 decd_siz(&csize
, &usize
, &fnlen
, &eflen
, &flags
, &method
, file_header
);
582 filename
= new_filename(pbf
, fnlen
);
583 lseek(fd
, eflen
, SEEK_CUR
);
584 if(filename
[fnlen
- 1] != '/') {
585 str_stream
= (method
== 8 || (flags
& 0x0008)) ?
586 (char *) inflate_string(pbf
, &csize
, &usize
) :
587 read_string(pbf
, csize
);
588 if(flags
& 0x008) check_crc(pbf
, str_stream
, usize
);
589 mk_ascii(str_stream
, usize
);
590 match_array
= fnd_match(exp
, str_stream
, &i
);
591 if((options
& JG_PRINT_LINE_NUMBER
) && i
)
592 nl_offsets
= fnd_match(nl_exp
, str_stream
, &j
);
593 prnt_mtchs(exp
, filename
, str_stream
, match_array
, nl_offsets
, i
, j
, options
);
594 if(match_array
) free(match_array
);
605 Funtion name: jargrep
606 args: exp Pointer to compiled POSIX style regular expression of search target.
607 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
608 if -n option is present at command line.
609 jarfile Filename of jar file to be searched.
610 options Bitwise flag containing flags set to represent the command line options.
611 purpose: Open jar file. Check signatures. When right signature is found go to deeper
615 static void jargrep(regex_t
*exp
, regex_t
*nl_exp
, const char *jarfile
, int options
){
621 if((fd
= open(jarfile
, O_RDONLY
)) == -1) {
622 if(!(options
& JG_SUPRESS_ERROR
))
623 fprintf(stderr
, "Error reading file '%s': %s\n", jarfile
, strerror(errno
));
629 if(pb_read(&pbf
, scratch
, 4) != 4) {
634 switch (check_sig(scratch
, &pbf
)) {
636 floop
= cont_grep(exp
, nl_exp
, fd
, &pbf
, options
);
642 /* fall through continue */
650 /* This is used to mark options with no short value. */
651 #define LONG_OPT(Num) ((Num) + 128)
653 #define OPT_HELP LONG_OPT (0)
655 static const struct option option_vec
[] =
657 { "help", no_argument
, NULL
, OPT_HELP
},
658 { "version", no_argument
, NULL
, 'V' },
659 { NULL
, no_argument
, NULL
, 0 }
664 args: argc number of in coming args.
665 argv array of strings.
666 purpose: Entry point of the program. Parse command line arguments and set options.
667 Set up regular expressions. Call grep routines for each file as input.
668 returns: 1 on error 0 on success.
671 int main(int argc
, char **argv
) {
677 regex_t
*nl_exp
= NULL
;
678 char *regexpstr
= NULL
;
680 while((c
= getopt_long(argc
, argv
, "bce:insVw",
681 option_vec
, NULL
)) != -1) {
684 options
|= JG_PRINT_BYTEOFFSET
;
687 options
|= JG_PRINT_COUNT
;
690 if(!(regexpstr
= (char *) malloc(strlen(optarg
) + 1))) {
691 fprintf(stderr
, "Malloc failure.\n");
692 fprintf(stderr
, "Error: %s\n", strerror(errno
));
695 strcpy(regexpstr
, optarg
);
698 options
|= JG_IGNORE_CASE
;
701 options
|= JG_PRINT_LINE_NUMBER
;
704 options
|= JG_SUPRESS_ERROR
;
707 options
|= JG_INVERT
;
713 options
|= JG_WORD_EXPRESSIONS
;
719 fprintf(stderr
, Usage
, argv
[0]);
724 if(((argc
- optind
) >= 2)) {
725 regexpstr
= argv
[optind
];
726 fileindex
= optind
+ 1;
729 fprintf(stderr
, "Invalid arguments.\n");
730 fprintf(stderr
, Usage
, argv
[0]);
734 else if((argc
- optind
) == 1) {
738 fprintf(stderr
, "Invalid arguments.\n");
739 fprintf(stderr
, Usage
, argv
[0]);
743 if(opt_valid(options
)) {
744 regexp
= create_regexp(regexpstr
, options
);
745 if(options
& JG_PRINT_LINE_NUMBER
) nl_exp
= create_regexp("\n", 0);
747 for(; fileindex
< argc
; fileindex
++)
748 jargrep(regexp
, nl_exp
, argv
[fileindex
], options
);
750 if(options
& JG_PRINT_LINE_NUMBER
) regfree(nl_exp
);
754 fprintf(stderr
, "Error: Invalid combination of options.\n");
760 void help(const char *filename
)
762 printf (Usage
, filename
);
765 Search files in a jar file for a pattern.\n\
767 -b print byte offset of match\n\
768 -c print number of matches\n\
769 -i compare case-insensitively\n\
770 -n print line number of each match\n\
771 -s suppress error messages\n\
772 -w force PATTERN to match only whole words\n\
773 -e PATTERN use PATTERN as regular expression\n\
774 -V|--version print version number and exit\n\
783 printf("grepjar (%s) %s\n\n", PACKAGE
, VERSION
);
784 printf("Copyright 1999, 2000, 2001 Bryan Burns\n");
785 printf("Copyright 2000 Cory Hollingsworth\n");
786 printf("Copyright 2002 Free Software Foundation\n");
788 This is free software; see the source for copying conditions. There is NO\n\
789 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");