2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999, 2000 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.3 2000/12/14 18:45:35 ghazi Exp $
27 Revision 1.3 2000/12/14 18:45:35 ghazi
30 * compress.c: Include stdlib.h and compress.h.
32 (report_str_error): Make static.
33 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
34 (hrd_inflate_str): Likewise.
36 * compress.h (init_compression, end_compression, init_inflation,
37 end_inflation): Prototype void arguments.
39 * dostime.c (rcsid): Delete.
41 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
42 Make functions static. Cast ctype function argument to `unsigned
43 char'. Add parens in if-stmts. Constify.
44 (Usage): Change into a macro.
45 (jargrep): Remove unused parameter.
47 * jartool.c: Constify. Add parens in if-stmts. Align
48 signed/unsigned char pointers in functions calls using casts.
50 (list_jar): Fix printf format specifier.
51 (usage): Chop long string into bits. Reformat.
53 * pushback.c (rcsid): Delete.
55 Revision 1.2 2000/12/11 02:59:55 apbianco
56 2000-12-10 Robert Lipe <robertlipe@usa.net>
58 * jargrep.c (jargrep): Added null statement after case.
60 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
63 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
65 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
67 Revision 1.1 2000/12/09 03:08:23 apbianco
68 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
72 Revision 1.8 2000/09/13 14:02:02 cory
73 Reformatted some of the code to more closly match the layout of the orriginal
76 Revision 1.7 2000/09/12 22:29:36 cory
77 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
78 will test some other platforms later.
89 #include <sys/types.h>
101 #include "compress.h"
106 #define Usage "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n"
112 Function name: opt_valid
113 arg: options Bitfield flag that contains the command line options of grepjar.
114 purpose: To guard agains the occurance of certain incompatible flags being used
116 returns: TRUE if options are valid, FALSE otherwise.
119 static int opt_valid(int options
) {
122 if((options
& JG_PRINT_COUNT
) &&
123 (options
& (JG_PRINT_BYTEOFFSET
| JG_PRINT_LINE_NUMBER
)))
133 Function name: create_regexp
134 args: regstr String containing the uncompiled regular expression. This may be the
135 expression as is passed in through argv.
136 options This is the flag containing the commandline options that have been
138 purpose: Handle the exception handling involved with setting upt a new regular
140 returns: Newly allocated compile regular expression ready to be used in an regexec call.
143 static regex_t
*create_regexp(const char *regstr
, int options
) {
149 if((exp
= (regex_t
*) malloc(sizeof(regex_t
))))
151 if((errcode
= regcomp(exp
, regstr
, (options
& JG_IGNORE_CASE
) ? REG_ICASE
: 0))) {
152 fprintf(stderr
, "regcomp of regex failed,\n");
153 if((errmsg
= (char *) malloc(msgsize
= regerror(errcode
, exp
, NULL
, 0) + 1))) {
154 regerror(errcode
, exp
, errmsg
, msgsize
);
155 fprintf(stderr
, "Error: %s\n", errmsg
);
161 fprintf(stderr
, "Malloc of errmsg failed.\n");
162 fprintf(stderr
, "Error: %s\n", strerror(errno
));
169 fprintf(stderr
, "Malloc of regex failed,\n");
170 fprintf(stderr
, "Error: %s\n", strerror(errno
));
178 Function name: check_sig
179 args: scratch Pointer to array of bytes containing signature.
180 pbf Pointer to push back handle for jar file.
181 purpose: Verify that checksum is correct.
182 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
183 we have read beyound the embedded file list and can exit knowing we have read all the
184 relevent information. 2 means we still haven't reached embdedded file list and need to
185 do some more reading.
187 static int check_sig(ub1
*scratch
, pb_file
*pbfp
) {
191 signature
= UNPACK_UB4(scratch
, 0);
194 printf("signature is %x\n", signature
);
196 if(signature
== 0x08074b50){
198 printf("skipping data descriptor\n");
200 pb_read(pbfp
, scratch
, 12);
202 } else if(signature
== 0x02014b50){
204 printf("Central header reached.. we're all done!\n");
207 }else if(signature
!= 0x04034b50){
208 printf("Ick! %#x\n", signature
);
216 Function name: decd_siz
217 args csize Pointer to embedded file's compressed size.
218 usize Pointer to embedded file's uncmpressed size.
219 fnlen Pointer to embedded file's file name length.
220 elfen Pointer to length of extra fields in jar file.
221 flags Pointer to bitmapped flags.
222 method Pointer to indicator of storage method of embedded file.
223 file_header Pointer to string containing the above values to be unbacked.
224 Purpose: Unpack the series of values from file_header.
227 static void decd_siz(ub4
*csize
, ub4
*usize
, ub2
*fnlen
, ub2
*eflen
, ub2
*flags
, ub2
*method
, ub1
*file_header
) {
228 *csize
= UNPACK_UB4(file_header
, LOC_CSIZE
);
230 printf("Compressed size is %u\n", *csize
);
233 *usize
= UNPACK_UB4(file_header
, LOC_USIZE
);
235 printf("Uncompressed size is %u\n", *usize
);
238 *fnlen
= UNPACK_UB2(file_header
, LOC_FNLEN
);
240 printf("Filename length is %hu\n", *fnlen
);
243 *eflen
= UNPACK_UB2(file_header
, LOC_EFLEN
);
245 printf("Extra field length is %hu\n", *eflen
);
248 *flags
= UNPACK_UB2(file_header
, LOC_EXTRA
);
250 printf("Flags are %#hx\n", *flags
);
253 *method
= UNPACK_UB2(file_header
, LOC_COMP
);
255 printf("Compression method is %#hx\n", *method
);
261 Function name: new_filename
262 args: pbf Pointer to push back file handle. Used for reading input file.
263 len Length of file name to be read.
264 purpose: Read in the embedded file name from jar file.
265 returns: Pointer to newly allocated string containing file name.
268 static char *new_filename(pb_file
*pbf
, ub4 len
) {
271 if(!(filename
= (char *) malloc(len
+ 1))) {
272 fprintf(stderr
, "Malloc failed of filename\n");
273 fprintf(stderr
, "Error: %s\n", strerror(errno
));
275 pb_read(pbf
, filename
, len
);
276 filename
[len
] = '\0';
279 printf("filename is %s\n", filename
);
286 Funtion name: read_string
287 args: pbf Pointer to push back file handle. Used for reading input file.
288 size Size of embedded file in bytes.
289 purpose: Create a string containing the contents of the embedded noncompressed file.
290 returns: Pointer to newly allocated string containing embedded file contents.
293 static char *read_string(pb_file
*pbf
, int size
) {
296 if((page
= (char *) malloc(size
+ 1))) {
297 pb_read(pbf
, page
, size
);
301 fprintf(stderr
, "Malloc of page buffer failed.\n");
302 fprintf(stderr
, "Error: %s\n", strerror(errno
));
310 Function name: extract_line
311 args: stream String containing the full contents of a file which is to be substringed
312 in order to provide line representing our grep output.
313 begin Index into stream which regular expression first matches.
314 end Index into stream which end of match to the regular expression.
315 b Pointer to the index of what will be the beginning of the line when
316 string is returned. Used for -b option.
317 purpose: Create a string that can be printed by jargrep from the long string stream.
318 The matching line that is printed out by jargrep is generated by this function.
319 returns: Pointer to newly allocated string containing matched expression.
322 static char *extract_line(const char *stream
, regoff_t begin
, regoff_t end
, int *b
) {
327 for(*b
= begin
; *b
>= 0 && !iscntrl((unsigned char)stream
[*b
]); (*b
)--);
329 for(e
= end
; stream
[e
] == '\t' || !iscntrl((unsigned char)stream
[e
]); e
++);
331 if((retstr
= (char *) malloc(length
+ 1))) {
332 sprintf(retstr
, "%d:", *b
);
333 strncpy(retstr
, &(stream
[*b
]), length
);
334 retstr
[length
] = '\0';
337 fprintf(stderr
, "Malloc failed of output string.\n");
338 fprintf(stderr
, "Error: %s\n", strerror(errno
));
346 Function name: chk_wrd
347 args: exp Pointer to compiled POSIX style regular expression of search target.
348 str String known to contain at least one match of exp.
349 purpose: Verify that the occurance of the regular expression in str occurs as a whole
350 word and not a substring of another word.
351 returns: TRUE if it is a word, FALSE of it is a substring.
354 static int chk_wrd(regex_t
*exp
, const char *str
) {
363 frnt_ok
= bck_ok
= FALSE
;
364 while(!wrd_fnd
&& !(regflag
= regexec(exp
, str2
, 1, &match
, 0))) {
365 if(!match
.rm_so
&& (str2
== str
)) frnt_ok
= TRUE
;
366 else if(!isalnum((unsigned char)str2
[match
.rm_so
- 1])
367 && str2
[match
.rm_so
- 1] != '_')
369 else frnt_ok
= FALSE
;
371 if(str2
[match
.rm_eo
] == '\0') bck_ok
= TRUE
;
372 else if(!isalnum((unsigned char)str2
[match
.rm_eo
])
373 && str2
[match
.rm_eo
] != '_')
377 wrd_fnd
= frnt_ok
&& bck_ok
;
378 str2
= &(str2
[match
.rm_eo
]);
385 Function name: prnt_mtchs
386 args: exp Pointer to compiled POSIX style regular expression of search target.
387 filename String containing the name of the embedded file which matches have
389 stream String containing the processed contents of the embedded jar file
390 represended with filename.
391 pmatch Array of regmatch_t matches into stream.
392 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
393 not set on command line.
394 num Number of matches in pmatch array.
395 lines Number of lines in file. Not set if -n is not set on command line.
396 options Bitwise flag containing flags set to represent the command line
398 purpose: Control output of jargrep. Output is controlled by which options have been
399 set at the command line.
402 static void prnt_mtchs(regex_t
*exp
, const char *filename
, const char *stream
, regmatch_t
*pmatch
, regmatch_t
*nl_offset
, int num
, int lines
, int options
) {
412 for(i
= 0; i
< num
; i
++) {
413 str
= extract_line(stream
, pmatch
[i
].rm_so
, pmatch
[i
].rm_eo
, &begin
);
414 if(begin
> o_begin
) {
415 if(!(options
& JG_WORD_EXPRESSIONS
) || chk_wrd(exp
, str
)) {
417 if(!(options
& JG_PRINT_COUNT
)) {
418 printf("%s:", filename
);
419 if(options
& JG_PRINT_LINE_NUMBER
) {
420 for(; j
< lines
&& nl_offset
[j
].rm_so
< begin
; j
++);
421 printf("%d:", j
+ 1);
423 if(options
& JG_PRINT_BYTEOFFSET
) printf("%d:", begin
);
431 if(options
& JG_PRINT_COUNT
) printf("%s:%d\n", filename
, ln_cnt
);
435 Function name: check_crc
436 args: pbf Pointer to pushback file pointer for jar file.
437 stream String containing the non modified contents fo the extraced file entry.
438 usize Size of file in bytes.
439 purpose: Verify the CRC matches that as what is stored in the jar file.
442 static void check_crc(pb_file
*pbf
, const char *stream
, ub4 usize
) {
447 crc
= crc32(crc
, NULL
, 0);
448 crc
= crc32(crc
, (const unsigned char *)stream
, usize
);
449 if(pb_read(pbf
, scratch
, 16) != 16) {
453 if(UNPACK_UB4(scratch
, 0) != 0x08074b50) {
454 fprintf(stderr
, "Error! Missing data descriptor!\n");
457 lcrc
= UNPACK_UB4(scratch
, 4);
459 fprintf(stderr
, "Error! CRCs do not match! Got %x, expected %x\n",
466 Function name mk_ascii
467 args: stream String that contains the contents of the extraced file entry.
469 purpose: Make certain that the contents of the file are ASCII, not binary. This
470 permits grepping of binary files as well by converting non ASCII and control characters
474 static void mk_ascii(char *stream
, size_t usize
) {
477 for(i
= 0; i
< usize
; i
++)
479 && (iscntrl((unsigned char)stream
[i
])
480 || (unsigned char) stream
[i
] >= 128))
485 Funtion name: fnd_match
486 args: exp Pointer to compiled POSIX style regular expression of search target.
487 str_stream String that contains the contents of the extracted file entry.
488 i Pointer to counter and index of matches.
489 purpose: Search str_stream for occurances of the regular expression exp and create
491 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
492 and end of matches. NULL is returned upon no matches found.
495 static regmatch_t
*fnd_match(regex_t
*exp
, const char *str_stream
, int *i
) {
498 regmatch_t
*match_array
;
502 for(*i
= 0, regflag
= regexec(exp
, str_stream
, 1, &match
, 0); !regflag
;
503 regflag
= regexec(exp
, &(str_stream
[match
.rm_eo
]), 1, &match
, 0), (*i
)++)
505 if((tmp
= (regmatch_t
*)
506 realloc(match_array
, sizeof(regmatch_t
) * ((*i
) + 1))))
510 match
.rm_so
+= match_array
[(*i
) - 1].rm_eo
;
511 match
.rm_eo
+= match_array
[(*i
) - 1].rm_eo
;
513 match_array
[*i
] = match
;
516 fprintf(stderr
, "Realloc of match_array failed.\n");
517 fprintf(stderr
, "Error: %s\n", strerror(errno
));
526 Function name: cont_grep
527 args: exp Pointer to compiled POSIX style regular expression of search target.
528 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
529 argument is NULL unless the -n option is used on the command line.
530 fd File descriptor of the jar file being grepped.
531 pbf Pointer to pushback file style file stream. This is for use with
532 the pushback.c file io funtions.
533 options Bitwise flag containing flags set to represent the command line options.
534 purpose: This function handles single entries in an open jar file. The header is
535 read and then the embeded file is extracted and grepped.
536 returns: FALSE upon failure, TRUE otherwise.
539 static int cont_grep(regex_t
*exp
, regex_t
*nl_exp
, int fd
, pb_file
*pbf
, int options
) {
552 regmatch_t
*match_array
;
553 regmatch_t
*nl_offsets
;
555 if(pb_read(pbf
, (file_header
+ 4), 26) != 26) {
560 decd_siz(&csize
, &usize
, &fnlen
, &eflen
, &flags
, &method
, file_header
);
561 filename
= new_filename(pbf
, fnlen
);
562 lseek(fd
, eflen
, SEEK_CUR
);
563 if(filename
[fnlen
- 1] != '/') {
564 str_stream
= (method
== 8 || (flags
& 0x0008)) ?
565 (char *) inflate_string(pbf
, &csize
, &usize
) :
566 read_string(pbf
, csize
);
567 if(flags
& 0x008) check_crc(pbf
, str_stream
, usize
);
568 mk_ascii(str_stream
, usize
);
569 match_array
= fnd_match(exp
, str_stream
, &i
);
570 if((options
& JG_PRINT_LINE_NUMBER
) && i
)
571 nl_offsets
= fnd_match(nl_exp
, str_stream
, &j
);
572 prnt_mtchs(exp
, filename
, str_stream
, match_array
, nl_offsets
, i
, j
, options
);
573 if(match_array
) free(match_array
);
584 Funtion name: jargrep
585 args: exp Pointer to compiled POSIX style regular expression of search target.
586 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
587 if -n option is present at command line.
588 jarfile Filename of jar file to be searched.
589 options Bitwise flag containing flags set to represent the command line options.
590 purpose: Open jar file. Check signatures. When right signature is found go to deeper
594 static void jargrep(regex_t
*exp
, regex_t
*nl_exp
, const char *jarfile
, int options
){
600 if((fd
= open(jarfile
, O_RDONLY
)) == -1) {
601 if(!(options
& JG_SUPRESS_ERROR
))
602 fprintf(stderr
, "Error reading file '%s': %s\n", jarfile
, strerror(errno
));
608 if(pb_read(&pbf
, scratch
, 4) != 4) {
613 switch (check_sig(scratch
, &pbf
)) {
615 floop
= cont_grep(exp
, nl_exp
, fd
, &pbf
, options
);
621 /* fall through continue */
631 args: argc number of in coming args.
632 argv array of strings.
633 purpose: Entry point of the program. Parse command line arguments and set options.
634 Set up regular expressions. Call grep routines for each file as input.
635 returns: 1 on error 0 on success.
638 int main(int argc
, char **argv
) {
644 regex_t
*nl_exp
= NULL
;
645 char *regexpstr
= NULL
;
647 while((c
= getopt(argc
, argv
, "bce:insVw")) != -1) {
650 options
|= JG_PRINT_BYTEOFFSET
;
653 options
|= JG_PRINT_COUNT
;
656 if(!(regexpstr
= (char *) malloc(strlen(optarg
) + 1))) {
657 fprintf(stderr
, "Malloc failure.\n");
658 fprintf(stderr
, "Error: %s\n", strerror(errno
));
661 strcpy(regexpstr
, optarg
);
664 options
|= JG_IGNORE_CASE
;
667 options
|= JG_PRINT_LINE_NUMBER
;
670 options
|= JG_SUPRESS_ERROR
;
673 options
|= JG_INVERT
;
676 printf("%s\n", GVERSION
);
679 options
|= JG_WORD_EXPRESSIONS
;
682 fprintf(stderr
, "Unknown option -%c\n", c
);
683 fprintf(stderr
, Usage
, argv
[0]);
688 if(((argc
- optind
) >= 2)) {
689 regexpstr
= argv
[optind
];
690 fileindex
= optind
+ 1;
693 fprintf(stderr
, "Invalid arguments.\n");
694 fprintf(stderr
, Usage
, argv
[0]);
698 else if((argc
- optind
) == 1) {
702 fprintf(stderr
, "Invalid arguments.\n");
703 fprintf(stderr
, Usage
, argv
[0]);
707 if(opt_valid(options
)) {
708 regexp
= create_regexp(regexpstr
, options
);
709 if(options
& JG_PRINT_LINE_NUMBER
) nl_exp
= create_regexp("\n", 0);
711 for(; fileindex
< argc
; fileindex
++)
712 jargrep(regexp
, nl_exp
, argv
[fileindex
], options
);
714 if(options
& JG_PRINT_LINE_NUMBER
) regfree(nl_exp
);
718 fprintf(stderr
, "Error: Invalid combination of options.\n");