2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 2002 Free Software Foundation
4 Copyright (C) 1999, 2000 Bryan Burns
5 Copyright (C) 2000 Cory Hollingsworth
7 Parts of this program are base on Bryan Burns work with fastjar
10 This program is free software; you can redistribute it and/or
11 modify it under the terms of the GNU General Public License
12 as published by the Free Software Foundation; either version 2
13 of the License, or (at your option) any later version.
15 This program is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with this program; if not, write to the Free Software
22 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
25 /* Id: jargrep.c,v 1.5 2002/01/03 04:57:56 rodrigc Exp
28 Revision 1.5 2002/01/03 04:57:56 rodrigc
29 2001-01-02 Craig Rodrigues <rodrigc@gcc.gnu.org>
32 * configure.in (AC_CHECK_HEADERS): Check for stdlib.h.
33 * Makefile.am: Move grepjar to bin_PROGRAMS.
34 * config.h.in: Regenerated.
35 * Makefile.in: Regenerated.
36 * aclocal.m4: Regenerated.
37 * jargrep.c: Eliminate some signed/unsigned and default
38 uninitialized warnings. Use HAVE_STDLIB_H instead of
40 * jartool.c: Likewise.
41 * compress.c: Likewise.
43 Revision 1.4 2000/12/15 18:45:09 tromey
44 * jargrep.c: Include getopt.h if it exists.
46 * configure, config.h: Rebuilt.
47 * configure.in: Check for getopt.h.
49 Revision 1.3 2000/12/14 18:45:35 ghazi
52 * compress.c: Include stdlib.h and compress.h.
54 (report_str_error): Make static.
55 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
56 (hrd_inflate_str): Likewise.
58 * compress.h (init_compression, end_compression, init_inflation,
59 end_inflation): Prototype void arguments.
61 * dostime.c (rcsid): Delete.
63 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
64 Make functions static. Cast ctype function argument to `unsigned
65 char'. Add parens in if-stmts. Constify.
66 (Usage): Change into a macro.
67 (jargrep): Remove unused parameter.
69 * jartool.c: Constify. Add parens in if-stmts. Align
70 signed/unsigned char pointers in functions calls using casts.
72 (list_jar): Fix printf format specifier.
73 (usage): Chop long string into bits. Reformat.
75 * pushback.c (rcsid): Delete.
77 Revision 1.2 2000/12/11 02:59:55 apbianco
78 2000-12-10 Robert Lipe <robertlipe@usa.net>
80 * jargrep.c (jargrep): Added null statement after case.
82 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
85 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
87 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
89 Revision 1.1 2000/12/09 03:08:23 apbianco
90 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
94 Revision 1.8 2000/09/13 14:02:02 cory
95 Reformatted some of the code to more closly match the layout of the orriginal
98 Revision 1.7 2000/09/12 22:29:36 cory
99 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
100 will test some other platforms later.
111 #include <sys/types.h>
112 #include <sys/stat.h>
120 #include "pushback.h"
123 #include "compress.h"
127 void help(const char *name
);
129 #define Usage "Usage: %s [-bcinsw] <-e PATTERN | PATTERN> FILE ...\n"
132 Function name: opt_valid
133 arg: options Bitfield flag that contains the command line options of grepjar.
134 purpose: To guard agains the occurance of certain incompatible flags being used
136 returns: TRUE if options are valid, FALSE otherwise.
139 static int opt_valid(int options
) {
142 if((options
& JG_PRINT_COUNT
) &&
143 (options
& (JG_PRINT_BYTEOFFSET
| JG_PRINT_LINE_NUMBER
)))
153 Function name: create_regexp
154 args: regstr String containing the uncompiled regular expression. This may be the
155 expression as is passed in through argv.
156 options This is the flag containing the commandline options that have been
158 purpose: Handle the exception handling involved with setting upt a new regular
160 returns: Newly allocated compile regular expression ready to be used in an regexec call.
163 static regex_t
*create_regexp(const char *regstr
, int options
) {
169 if((exp
= (regex_t
*) malloc(sizeof(regex_t
))))
171 if((errcode
= regcomp(exp
, regstr
, (options
& JG_IGNORE_CASE
) ? REG_ICASE
: 0))) {
172 fprintf(stderr
, "regcomp of regex failed,\n");
173 if((errmsg
= (char *) malloc(msgsize
= regerror(errcode
, exp
, NULL
, 0) + 1))) {
174 regerror(errcode
, exp
, errmsg
, msgsize
);
175 fprintf(stderr
, "Error: %s\n", errmsg
);
181 fprintf(stderr
, "Malloc of errmsg failed.\n");
182 fprintf(stderr
, "Error: %s\n", strerror(errno
));
189 fprintf(stderr
, "Malloc of regex failed,\n");
190 fprintf(stderr
, "Error: %s\n", strerror(errno
));
198 Function name: check_sig
199 args: scratch Pointer to array of bytes containing signature.
200 pbf Pointer to push back handle for jar file.
201 purpose: Verify that checksum is correct.
202 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
203 we have read beyound the embedded file list and can exit knowing we have read all the
204 relevent information. 2 means we still haven't reached embdedded file list and need to
205 do some more reading.
207 static int check_sig(ub1
*scratch
, pb_file
*pbfp
) {
211 signature
= UNPACK_UB4(scratch
, 0);
214 printf("signature is %x\n", signature
);
216 if(signature
== 0x08074b50){
218 printf("skipping data descriptor\n");
220 pb_read(pbfp
, scratch
, 12);
222 } else if(signature
== 0x02014b50){
224 printf("Central header reached.. we're all done!\n");
227 }else if(signature
!= 0x04034b50){
228 printf("Ick! %#x\n", signature
);
236 Function name: decd_siz
237 args csize Pointer to embedded file's compressed size.
238 usize Pointer to embedded file's uncmpressed size.
239 fnlen Pointer to embedded file's file name length.
240 elfen Pointer to length of extra fields in jar file.
241 flags Pointer to bitmapped flags.
242 method Pointer to indicator of storage method of embedded file.
243 file_header Pointer to string containing the above values to be unbacked.
244 Purpose: Unpack the series of values from file_header.
247 static void decd_siz(ub4
*csize
, ub4
*usize
, ub2
*fnlen
, ub2
*eflen
, ub2
*flags
, ub2
*method
, ub1
*file_header
) {
248 *csize
= UNPACK_UB4(file_header
, LOC_CSIZE
);
250 printf("Compressed size is %u\n", *csize
);
253 *usize
= UNPACK_UB4(file_header
, LOC_USIZE
);
255 printf("Uncompressed size is %u\n", *usize
);
258 *fnlen
= UNPACK_UB2(file_header
, LOC_FNLEN
);
260 printf("Filename length is %hu\n", *fnlen
);
263 *eflen
= UNPACK_UB2(file_header
, LOC_EFLEN
);
265 printf("Extra field length is %hu\n", *eflen
);
268 *flags
= UNPACK_UB2(file_header
, LOC_EXTRA
);
270 printf("Flags are %#hx\n", *flags
);
273 *method
= UNPACK_UB2(file_header
, LOC_COMP
);
275 printf("Compression method is %#hx\n", *method
);
281 Function name: new_filename
282 args: pbf Pointer to push back file handle. Used for reading input file.
283 len Length of file name to be read.
284 purpose: Read in the embedded file name from jar file.
285 returns: Pointer to newly allocated string containing file name.
288 static char *new_filename(pb_file
*pbf
, ub4 len
) {
291 if(!(filename
= (char *) malloc(len
+ 1))) {
292 fprintf(stderr
, "Malloc failed of filename\n");
293 fprintf(stderr
, "Error: %s\n", strerror(errno
));
295 pb_read(pbf
, filename
, len
);
296 filename
[len
] = '\0';
299 printf("filename is %s\n", filename
);
306 Funtion name: read_string
307 args: pbf Pointer to push back file handle. Used for reading input file.
308 size Size of embedded file in bytes.
309 purpose: Create a string containing the contents of the embedded noncompressed file.
310 returns: Pointer to newly allocated string containing embedded file contents.
313 static char *read_string(pb_file
*pbf
, int size
) {
316 if((page
= (char *) malloc(size
+ 1))) {
317 pb_read(pbf
, page
, size
);
321 fprintf(stderr
, "Malloc of page buffer failed.\n");
322 fprintf(stderr
, "Error: %s\n", strerror(errno
));
330 Function name: extract_line
331 args: stream String containing the full contents of a file which is to be substringed
332 in order to provide line representing our grep output.
333 begin Index into stream which regular expression first matches.
334 end Index into stream which end of match to the regular expression.
335 b Pointer to the index of what will be the beginning of the line when
336 string is returned. Used for -b option.
337 purpose: Create a string that can be printed by jargrep from the long string stream.
338 The matching line that is printed out by jargrep is generated by this function.
339 returns: Pointer to newly allocated string containing matched expression.
342 static char *extract_line(const char *stream
, regoff_t begin
, regoff_t end
, int *b
) {
347 for(*b
= begin
; *b
>= 0 && !iscntrl((unsigned char)stream
[*b
]); (*b
)--);
349 for(e
= end
; stream
[e
] == '\t' || !iscntrl((unsigned char)stream
[e
]); e
++);
351 if((retstr
= (char *) malloc(length
+ 1))) {
352 sprintf(retstr
, "%d:", *b
);
353 strncpy(retstr
, &(stream
[*b
]), length
);
354 retstr
[length
] = '\0';
357 fprintf(stderr
, "Malloc failed of output string.\n");
358 fprintf(stderr
, "Error: %s\n", strerror(errno
));
366 Function name: chk_wrd
367 args: exp Pointer to compiled POSIX style regular expression of search target.
368 str String known to contain at least one match of exp.
369 purpose: Verify that the occurance of the regular expression in str occurs as a whole
370 word and not a substring of another word.
371 returns: TRUE if it is a word, FALSE of it is a substring.
374 static int chk_wrd(regex_t
*exp
, const char *str
) {
383 frnt_ok
= bck_ok
= FALSE
;
384 while(!wrd_fnd
&& !(regflag
= regexec(exp
, str2
, 1, &match
, 0))) {
385 if(!match
.rm_so
&& (str2
== str
)) frnt_ok
= TRUE
;
386 else if(!isalnum((unsigned char)str2
[match
.rm_so
- 1])
387 && str2
[match
.rm_so
- 1] != '_')
389 else frnt_ok
= FALSE
;
391 if(str2
[match
.rm_eo
] == '\0') bck_ok
= TRUE
;
392 else if(!isalnum((unsigned char)str2
[match
.rm_eo
])
393 && str2
[match
.rm_eo
] != '_')
397 wrd_fnd
= frnt_ok
&& bck_ok
;
398 str2
= &(str2
[match
.rm_eo
]);
405 Function name: prnt_mtchs
406 args: exp Pointer to compiled POSIX style regular expression of search target.
407 filename String containing the name of the embedded file which matches have
409 stream String containing the processed contents of the embedded jar file
410 represended with filename.
411 pmatch Array of regmatch_t matches into stream.
412 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
413 not set on command line.
414 num Number of matches in pmatch array.
415 lines Number of lines in file. Not set if -n is not set on command line.
416 options Bitwise flag containing flags set to represent the command line
418 purpose: Control output of jargrep. Output is controlled by which options have been
419 set at the command line.
422 static void prnt_mtchs(regex_t
*exp
, const char *filename
, const char *stream
, regmatch_t
*pmatch
, regmatch_t
*nl_offset
, int num
, int lines
, int options
) {
432 for(i
= 0; i
< num
; i
++) {
433 str
= extract_line(stream
, pmatch
[i
].rm_so
, pmatch
[i
].rm_eo
, &begin
);
434 if(begin
> o_begin
) {
435 if(!(options
& JG_WORD_EXPRESSIONS
) || chk_wrd(exp
, str
)) {
437 if(!(options
& JG_PRINT_COUNT
)) {
438 printf("%s:", filename
);
439 if(options
& JG_PRINT_LINE_NUMBER
) {
440 for(; j
< lines
&& nl_offset
[j
].rm_so
< begin
; j
++);
441 printf("%d:", j
+ 1);
443 if(options
& JG_PRINT_BYTEOFFSET
) printf("%d:", begin
);
451 if(options
& JG_PRINT_COUNT
) printf("%s:%d\n", filename
, ln_cnt
);
455 Function name: check_crc
456 args: pbf Pointer to pushback file pointer for jar file.
457 stream String containing the non modified contents fo the extraced file entry.
458 usize Size of file in bytes.
459 purpose: Verify the CRC matches that as what is stored in the jar file.
462 static void check_crc(pb_file
*pbf
, const char *stream
, ub4 usize
) {
467 crc
= crc32(crc
, NULL
, 0);
468 crc
= crc32(crc
, (const unsigned char *)stream
, usize
);
469 if(pb_read(pbf
, scratch
, 16) != 16) {
473 if(UNPACK_UB4(scratch
, 0) != 0x08074b50) {
474 fprintf(stderr
, "Error! Missing data descriptor!\n");
477 lcrc
= UNPACK_UB4(scratch
, 4);
479 fprintf(stderr
, "Error! CRCs do not match! Got %x, expected %x\n",
486 Function name mk_ascii
487 args: stream String that contains the contents of the extraced file entry.
489 purpose: Make certain that the contents of the file are ASCII, not binary. This
490 permits grepping of binary files as well by converting non ASCII and control characters
494 static void mk_ascii(char *stream
, size_t usize
) {
497 for(i
= 0; i
< usize
; i
++)
499 && (iscntrl((unsigned char)stream
[i
])
500 || (unsigned char) stream
[i
] >= 128))
505 Funtion name: fnd_match
506 args: exp Pointer to compiled POSIX style regular expression of search target.
507 str_stream String that contains the contents of the extracted file entry.
508 i Pointer to counter and index of matches.
509 purpose: Search str_stream for occurances of the regular expression exp and create
511 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
512 and end of matches. NULL is returned upon no matches found.
515 static regmatch_t
*fnd_match(regex_t
*exp
, const char *str_stream
, int *i
) {
518 regmatch_t
*match_array
;
522 for(*i
= 0, regflag
= regexec(exp
, str_stream
, 1, &match
, 0); !regflag
;
523 regflag
= regexec(exp
, &(str_stream
[match
.rm_eo
]), 1, &match
, 0), (*i
)++)
525 if((tmp
= (regmatch_t
*)
526 realloc(match_array
, sizeof(regmatch_t
) * ((*i
) + 1))))
530 match
.rm_so
+= match_array
[(*i
) - 1].rm_eo
;
531 match
.rm_eo
+= match_array
[(*i
) - 1].rm_eo
;
533 match_array
[*i
] = match
;
536 fprintf(stderr
, "Realloc of match_array failed.\n");
537 fprintf(stderr
, "Error: %s\n", strerror(errno
));
546 Function name: cont_grep
547 args: exp Pointer to compiled POSIX style regular expression of search target.
548 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
549 argument is NULL unless the -n option is used on the command line.
550 fd File descriptor of the jar file being grepped.
551 pbf Pointer to pushback file style file stream. This is for use with
552 the pushback.c file io funtions.
553 options Bitwise flag containing flags set to represent the command line options.
554 purpose: This function handles single entries in an open jar file. The header is
555 read and then the embeded file is extracted and grepped.
556 returns: FALSE upon failure, TRUE otherwise.
559 static int cont_grep(regex_t
*exp
, regex_t
*nl_exp
, int fd
, pb_file
*pbf
, int options
) {
572 regmatch_t
*match_array
;
573 regmatch_t
*nl_offsets
=0;
575 if(pb_read(pbf
, (file_header
+ 4), 26) != 26) {
580 decd_siz(&csize
, &usize
, &fnlen
, &eflen
, &flags
, &method
, file_header
);
581 filename
= new_filename(pbf
, fnlen
);
582 lseek(fd
, eflen
, SEEK_CUR
);
583 if(filename
[fnlen
- 1] != '/') {
584 str_stream
= (method
== 8 || (flags
& 0x0008)) ?
585 (char *) inflate_string(pbf
, &csize
, &usize
) :
586 read_string(pbf
, csize
);
587 if(flags
& 0x008) check_crc(pbf
, str_stream
, usize
);
588 mk_ascii(str_stream
, usize
);
589 match_array
= fnd_match(exp
, str_stream
, &i
);
590 if((options
& JG_PRINT_LINE_NUMBER
) && i
)
591 nl_offsets
= fnd_match(nl_exp
, str_stream
, &j
);
592 prnt_mtchs(exp
, filename
, str_stream
, match_array
, nl_offsets
, i
, j
, options
);
593 if(match_array
) free(match_array
);
604 Funtion name: jargrep
605 args: exp Pointer to compiled POSIX style regular expression of search target.
606 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
607 if -n option is present at command line.
608 jarfile Filename of jar file to be searched.
609 options Bitwise flag containing flags set to represent the command line options.
610 purpose: Open jar file. Check signatures. When right signature is found go to deeper
614 static void jargrep(regex_t
*exp
, regex_t
*nl_exp
, const char *jarfile
, int options
){
620 if((fd
= open(jarfile
, O_RDONLY
)) == -1) {
621 if(!(options
& JG_SUPRESS_ERROR
))
622 fprintf(stderr
, "Error reading file '%s': %s\n", jarfile
, strerror(errno
));
628 if(pb_read(&pbf
, scratch
, 4) != 4) {
633 switch (check_sig(scratch
, &pbf
)) {
635 floop
= cont_grep(exp
, nl_exp
, fd
, &pbf
, options
);
641 /* fall through continue */
649 /* This is used to mark options with no short value. */
650 #define LONG_OPT(Num) ((Num) + 128)
652 #define OPT_HELP LONG_OPT (0)
654 static const struct option option_vec
[] =
656 { "help", no_argument
, NULL
, OPT_HELP
},
657 { "version", no_argument
, NULL
, 'V' },
658 { NULL
, no_argument
, NULL
, 0 }
663 args: argc number of in coming args.
664 argv array of strings.
665 purpose: Entry point of the program. Parse command line arguments and set options.
666 Set up regular expressions. Call grep routines for each file as input.
667 returns: 1 on error 0 on success.
670 int main(int argc
, char **argv
) {
676 regex_t
*nl_exp
= NULL
;
677 char *regexpstr
= NULL
;
679 while((c
= getopt_long(argc
, argv
, "bce:insVw",
680 option_vec
, NULL
)) != -1) {
683 options
|= JG_PRINT_BYTEOFFSET
;
686 options
|= JG_PRINT_COUNT
;
689 if(!(regexpstr
= (char *) malloc(strlen(optarg
) + 1))) {
690 fprintf(stderr
, "Malloc failure.\n");
691 fprintf(stderr
, "Error: %s\n", strerror(errno
));
694 strcpy(regexpstr
, optarg
);
697 options
|= JG_IGNORE_CASE
;
700 options
|= JG_PRINT_LINE_NUMBER
;
703 options
|= JG_SUPRESS_ERROR
;
706 options
|= JG_INVERT
;
712 options
|= JG_WORD_EXPRESSIONS
;
718 fprintf(stderr
, Usage
, argv
[0]);
723 if(((argc
- optind
) >= 2)) {
724 regexpstr
= argv
[optind
];
725 fileindex
= optind
+ 1;
728 fprintf(stderr
, "Invalid arguments.\n");
729 fprintf(stderr
, Usage
, argv
[0]);
733 else if((argc
- optind
) == 1) {
737 fprintf(stderr
, "Invalid arguments.\n");
738 fprintf(stderr
, Usage
, argv
[0]);
742 if(opt_valid(options
)) {
743 regexp
= create_regexp(regexpstr
, options
);
744 if(options
& JG_PRINT_LINE_NUMBER
) nl_exp
= create_regexp("\n", 0);
746 for(; fileindex
< argc
; fileindex
++)
747 jargrep(regexp
, nl_exp
, argv
[fileindex
], options
);
749 if(options
& JG_PRINT_LINE_NUMBER
) regfree(nl_exp
);
753 fprintf(stderr
, "Error: Invalid combination of options.\n");
759 void help(const char *filename
)
761 printf (Usage
, filename
);
764 Search files in a jar file for a pattern.\n\
766 -b print byte offset of match\n\
767 -c print number of matches\n\
768 -i compare case-insensitively\n\
769 -n print line number of each match\n\
770 -s suppress error messages\n\
771 -w force PATTERN to match only whole words\n\
772 -e PATTERN use PATTERN as regular exprssion\n\
780 printf("grepjar (%s) %s\n\n", PACKAGE
, VERSION
);
781 printf("Copyright 1999, 2000, 2001 Bryan Burns\n");
782 printf("Copyright 2000 Cory Hollingsworth\n");
783 printf("Copyright 2002 Free Software Foundation\n");
785 This is free software; see the source for copying conditions. There is NO\n\
786 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n");