2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999, 2000 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.4 2000/12/15 18:45:09 tromey Exp $
27 Revision 1.4 2000/12/15 18:45:09 tromey
28 * jargrep.c: Include getopt.h if it exists.
30 * configure, config.h: Rebuilt.
31 * configure.in: Check for getopt.h.
33 Revision 1.3 2000/12/14 18:45:35 ghazi
36 * compress.c: Include stdlib.h and compress.h.
38 (report_str_error): Make static.
39 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
40 (hrd_inflate_str): Likewise.
42 * compress.h (init_compression, end_compression, init_inflation,
43 end_inflation): Prototype void arguments.
45 * dostime.c (rcsid): Delete.
47 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
48 Make functions static. Cast ctype function argument to `unsigned
49 char'. Add parens in if-stmts. Constify.
50 (Usage): Change into a macro.
51 (jargrep): Remove unused parameter.
53 * jartool.c: Constify. Add parens in if-stmts. Align
54 signed/unsigned char pointers in functions calls using casts.
56 (list_jar): Fix printf format specifier.
57 (usage): Chop long string into bits. Reformat.
59 * pushback.c (rcsid): Delete.
61 Revision 1.2 2000/12/11 02:59:55 apbianco
62 2000-12-10 Robert Lipe <robertlipe@usa.net>
64 * jargrep.c (jargrep): Added null statement after case.
66 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
69 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
71 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
73 Revision 1.1 2000/12/09 03:08:23 apbianco
74 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
78 Revision 1.8 2000/09/13 14:02:02 cory
79 Reformatted some of the code to more closly match the layout of the orriginal
82 Revision 1.7 2000/09/12 22:29:36 cory
83 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
84 will test some other platforms later.
95 #include <sys/types.h>
104 #include "pushback.h"
107 #include "compress.h"
112 #define Usage "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n"
118 Function name: opt_valid
119 arg: options Bitfield flag that contains the command line options of grepjar.
120 purpose: To guard agains the occurance of certain incompatible flags being used
122 returns: TRUE if options are valid, FALSE otherwise.
125 static int opt_valid(int options
) {
128 if((options
& JG_PRINT_COUNT
) &&
129 (options
& (JG_PRINT_BYTEOFFSET
| JG_PRINT_LINE_NUMBER
)))
139 Function name: create_regexp
140 args: regstr String containing the uncompiled regular expression. This may be the
141 expression as is passed in through argv.
142 options This is the flag containing the commandline options that have been
144 purpose: Handle the exception handling involved with setting upt a new regular
146 returns: Newly allocated compile regular expression ready to be used in an regexec call.
149 static regex_t
*create_regexp(const char *regstr
, int options
) {
155 if((exp
= (regex_t
*) malloc(sizeof(regex_t
))))
157 if((errcode
= regcomp(exp
, regstr
, (options
& JG_IGNORE_CASE
) ? REG_ICASE
: 0))) {
158 fprintf(stderr
, "regcomp of regex failed,\n");
159 if((errmsg
= (char *) malloc(msgsize
= regerror(errcode
, exp
, NULL
, 0) + 1))) {
160 regerror(errcode
, exp
, errmsg
, msgsize
);
161 fprintf(stderr
, "Error: %s\n", errmsg
);
167 fprintf(stderr
, "Malloc of errmsg failed.\n");
168 fprintf(stderr
, "Error: %s\n", strerror(errno
));
175 fprintf(stderr
, "Malloc of regex failed,\n");
176 fprintf(stderr
, "Error: %s\n", strerror(errno
));
184 Function name: check_sig
185 args: scratch Pointer to array of bytes containing signature.
186 pbf Pointer to push back handle for jar file.
187 purpose: Verify that checksum is correct.
188 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
189 we have read beyound the embedded file list and can exit knowing we have read all the
190 relevent information. 2 means we still haven't reached embdedded file list and need to
191 do some more reading.
193 static int check_sig(ub1
*scratch
, pb_file
*pbfp
) {
197 signature
= UNPACK_UB4(scratch
, 0);
200 printf("signature is %x\n", signature
);
202 if(signature
== 0x08074b50){
204 printf("skipping data descriptor\n");
206 pb_read(pbfp
, scratch
, 12);
208 } else if(signature
== 0x02014b50){
210 printf("Central header reached.. we're all done!\n");
213 }else if(signature
!= 0x04034b50){
214 printf("Ick! %#x\n", signature
);
222 Function name: decd_siz
223 args csize Pointer to embedded file's compressed size.
224 usize Pointer to embedded file's uncmpressed size.
225 fnlen Pointer to embedded file's file name length.
226 elfen Pointer to length of extra fields in jar file.
227 flags Pointer to bitmapped flags.
228 method Pointer to indicator of storage method of embedded file.
229 file_header Pointer to string containing the above values to be unbacked.
230 Purpose: Unpack the series of values from file_header.
233 static void decd_siz(ub4
*csize
, ub4
*usize
, ub2
*fnlen
, ub2
*eflen
, ub2
*flags
, ub2
*method
, ub1
*file_header
) {
234 *csize
= UNPACK_UB4(file_header
, LOC_CSIZE
);
236 printf("Compressed size is %u\n", *csize
);
239 *usize
= UNPACK_UB4(file_header
, LOC_USIZE
);
241 printf("Uncompressed size is %u\n", *usize
);
244 *fnlen
= UNPACK_UB2(file_header
, LOC_FNLEN
);
246 printf("Filename length is %hu\n", *fnlen
);
249 *eflen
= UNPACK_UB2(file_header
, LOC_EFLEN
);
251 printf("Extra field length is %hu\n", *eflen
);
254 *flags
= UNPACK_UB2(file_header
, LOC_EXTRA
);
256 printf("Flags are %#hx\n", *flags
);
259 *method
= UNPACK_UB2(file_header
, LOC_COMP
);
261 printf("Compression method is %#hx\n", *method
);
267 Function name: new_filename
268 args: pbf Pointer to push back file handle. Used for reading input file.
269 len Length of file name to be read.
270 purpose: Read in the embedded file name from jar file.
271 returns: Pointer to newly allocated string containing file name.
274 static char *new_filename(pb_file
*pbf
, ub4 len
) {
277 if(!(filename
= (char *) malloc(len
+ 1))) {
278 fprintf(stderr
, "Malloc failed of filename\n");
279 fprintf(stderr
, "Error: %s\n", strerror(errno
));
281 pb_read(pbf
, filename
, len
);
282 filename
[len
] = '\0';
285 printf("filename is %s\n", filename
);
292 Funtion name: read_string
293 args: pbf Pointer to push back file handle. Used for reading input file.
294 size Size of embedded file in bytes.
295 purpose: Create a string containing the contents of the embedded noncompressed file.
296 returns: Pointer to newly allocated string containing embedded file contents.
299 static char *read_string(pb_file
*pbf
, int size
) {
302 if((page
= (char *) malloc(size
+ 1))) {
303 pb_read(pbf
, page
, size
);
307 fprintf(stderr
, "Malloc of page buffer failed.\n");
308 fprintf(stderr
, "Error: %s\n", strerror(errno
));
316 Function name: extract_line
317 args: stream String containing the full contents of a file which is to be substringed
318 in order to provide line representing our grep output.
319 begin Index into stream which regular expression first matches.
320 end Index into stream which end of match to the regular expression.
321 b Pointer to the index of what will be the beginning of the line when
322 string is returned. Used for -b option.
323 purpose: Create a string that can be printed by jargrep from the long string stream.
324 The matching line that is printed out by jargrep is generated by this function.
325 returns: Pointer to newly allocated string containing matched expression.
328 static char *extract_line(const char *stream
, regoff_t begin
, regoff_t end
, int *b
) {
333 for(*b
= begin
; *b
>= 0 && !iscntrl((unsigned char)stream
[*b
]); (*b
)--);
335 for(e
= end
; stream
[e
] == '\t' || !iscntrl((unsigned char)stream
[e
]); e
++);
337 if((retstr
= (char *) malloc(length
+ 1))) {
338 sprintf(retstr
, "%d:", *b
);
339 strncpy(retstr
, &(stream
[*b
]), length
);
340 retstr
[length
] = '\0';
343 fprintf(stderr
, "Malloc failed of output string.\n");
344 fprintf(stderr
, "Error: %s\n", strerror(errno
));
352 Function name: chk_wrd
353 args: exp Pointer to compiled POSIX style regular expression of search target.
354 str String known to contain at least one match of exp.
355 purpose: Verify that the occurance of the regular expression in str occurs as a whole
356 word and not a substring of another word.
357 returns: TRUE if it is a word, FALSE of it is a substring.
360 static int chk_wrd(regex_t
*exp
, const char *str
) {
369 frnt_ok
= bck_ok
= FALSE
;
370 while(!wrd_fnd
&& !(regflag
= regexec(exp
, str2
, 1, &match
, 0))) {
371 if(!match
.rm_so
&& (str2
== str
)) frnt_ok
= TRUE
;
372 else if(!isalnum((unsigned char)str2
[match
.rm_so
- 1])
373 && str2
[match
.rm_so
- 1] != '_')
375 else frnt_ok
= FALSE
;
377 if(str2
[match
.rm_eo
] == '\0') bck_ok
= TRUE
;
378 else if(!isalnum((unsigned char)str2
[match
.rm_eo
])
379 && str2
[match
.rm_eo
] != '_')
383 wrd_fnd
= frnt_ok
&& bck_ok
;
384 str2
= &(str2
[match
.rm_eo
]);
391 Function name: prnt_mtchs
392 args: exp Pointer to compiled POSIX style regular expression of search target.
393 filename String containing the name of the embedded file which matches have
395 stream String containing the processed contents of the embedded jar file
396 represended with filename.
397 pmatch Array of regmatch_t matches into stream.
398 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
399 not set on command line.
400 num Number of matches in pmatch array.
401 lines Number of lines in file. Not set if -n is not set on command line.
402 options Bitwise flag containing flags set to represent the command line
404 purpose: Control output of jargrep. Output is controlled by which options have been
405 set at the command line.
408 static void prnt_mtchs(regex_t
*exp
, const char *filename
, const char *stream
, regmatch_t
*pmatch
, regmatch_t
*nl_offset
, int num
, int lines
, int options
) {
418 for(i
= 0; i
< num
; i
++) {
419 str
= extract_line(stream
, pmatch
[i
].rm_so
, pmatch
[i
].rm_eo
, &begin
);
420 if(begin
> o_begin
) {
421 if(!(options
& JG_WORD_EXPRESSIONS
) || chk_wrd(exp
, str
)) {
423 if(!(options
& JG_PRINT_COUNT
)) {
424 printf("%s:", filename
);
425 if(options
& JG_PRINT_LINE_NUMBER
) {
426 for(; j
< lines
&& nl_offset
[j
].rm_so
< begin
; j
++);
427 printf("%d:", j
+ 1);
429 if(options
& JG_PRINT_BYTEOFFSET
) printf("%d:", begin
);
437 if(options
& JG_PRINT_COUNT
) printf("%s:%d\n", filename
, ln_cnt
);
441 Function name: check_crc
442 args: pbf Pointer to pushback file pointer for jar file.
443 stream String containing the non modified contents fo the extraced file entry.
444 usize Size of file in bytes.
445 purpose: Verify the CRC matches that as what is stored in the jar file.
448 static void check_crc(pb_file
*pbf
, const char *stream
, ub4 usize
) {
453 crc
= crc32(crc
, NULL
, 0);
454 crc
= crc32(crc
, (const unsigned char *)stream
, usize
);
455 if(pb_read(pbf
, scratch
, 16) != 16) {
459 if(UNPACK_UB4(scratch
, 0) != 0x08074b50) {
460 fprintf(stderr
, "Error! Missing data descriptor!\n");
463 lcrc
= UNPACK_UB4(scratch
, 4);
465 fprintf(stderr
, "Error! CRCs do not match! Got %x, expected %x\n",
472 Function name mk_ascii
473 args: stream String that contains the contents of the extraced file entry.
475 purpose: Make certain that the contents of the file are ASCII, not binary. This
476 permits grepping of binary files as well by converting non ASCII and control characters
480 static void mk_ascii(char *stream
, size_t usize
) {
483 for(i
= 0; i
< usize
; i
++)
485 && (iscntrl((unsigned char)stream
[i
])
486 || (unsigned char) stream
[i
] >= 128))
491 Funtion name: fnd_match
492 args: exp Pointer to compiled POSIX style regular expression of search target.
493 str_stream String that contains the contents of the extracted file entry.
494 i Pointer to counter and index of matches.
495 purpose: Search str_stream for occurances of the regular expression exp and create
497 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
498 and end of matches. NULL is returned upon no matches found.
501 static regmatch_t
*fnd_match(regex_t
*exp
, const char *str_stream
, int *i
) {
504 regmatch_t
*match_array
;
508 for(*i
= 0, regflag
= regexec(exp
, str_stream
, 1, &match
, 0); !regflag
;
509 regflag
= regexec(exp
, &(str_stream
[match
.rm_eo
]), 1, &match
, 0), (*i
)++)
511 if((tmp
= (regmatch_t
*)
512 realloc(match_array
, sizeof(regmatch_t
) * ((*i
) + 1))))
516 match
.rm_so
+= match_array
[(*i
) - 1].rm_eo
;
517 match
.rm_eo
+= match_array
[(*i
) - 1].rm_eo
;
519 match_array
[*i
] = match
;
522 fprintf(stderr
, "Realloc of match_array failed.\n");
523 fprintf(stderr
, "Error: %s\n", strerror(errno
));
532 Function name: cont_grep
533 args: exp Pointer to compiled POSIX style regular expression of search target.
534 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
535 argument is NULL unless the -n option is used on the command line.
536 fd File descriptor of the jar file being grepped.
537 pbf Pointer to pushback file style file stream. This is for use with
538 the pushback.c file io funtions.
539 options Bitwise flag containing flags set to represent the command line options.
540 purpose: This function handles single entries in an open jar file. The header is
541 read and then the embeded file is extracted and grepped.
542 returns: FALSE upon failure, TRUE otherwise.
545 static int cont_grep(regex_t
*exp
, regex_t
*nl_exp
, int fd
, pb_file
*pbf
, int options
) {
558 regmatch_t
*match_array
;
559 regmatch_t
*nl_offsets
=0;
561 if(pb_read(pbf
, (file_header
+ 4), 26) != 26) {
566 decd_siz(&csize
, &usize
, &fnlen
, &eflen
, &flags
, &method
, file_header
);
567 filename
= new_filename(pbf
, fnlen
);
568 lseek(fd
, eflen
, SEEK_CUR
);
569 if(filename
[fnlen
- 1] != '/') {
570 str_stream
= (method
== 8 || (flags
& 0x0008)) ?
571 (char *) inflate_string(pbf
, &csize
, &usize
) :
572 read_string(pbf
, csize
);
573 if(flags
& 0x008) check_crc(pbf
, str_stream
, usize
);
574 mk_ascii(str_stream
, usize
);
575 match_array
= fnd_match(exp
, str_stream
, &i
);
576 if((options
& JG_PRINT_LINE_NUMBER
) && i
)
577 nl_offsets
= fnd_match(nl_exp
, str_stream
, &j
);
578 prnt_mtchs(exp
, filename
, str_stream
, match_array
, nl_offsets
, i
, j
, options
);
579 if(match_array
) free(match_array
);
590 Funtion name: jargrep
591 args: exp Pointer to compiled POSIX style regular expression of search target.
592 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
593 if -n option is present at command line.
594 jarfile Filename of jar file to be searched.
595 options Bitwise flag containing flags set to represent the command line options.
596 purpose: Open jar file. Check signatures. When right signature is found go to deeper
600 static void jargrep(regex_t
*exp
, regex_t
*nl_exp
, const char *jarfile
, int options
){
606 if((fd
= open(jarfile
, O_RDONLY
)) == -1) {
607 if(!(options
& JG_SUPRESS_ERROR
))
608 fprintf(stderr
, "Error reading file '%s': %s\n", jarfile
, strerror(errno
));
614 if(pb_read(&pbf
, scratch
, 4) != 4) {
619 switch (check_sig(scratch
, &pbf
)) {
621 floop
= cont_grep(exp
, nl_exp
, fd
, &pbf
, options
);
627 /* fall through continue */
637 args: argc number of in coming args.
638 argv array of strings.
639 purpose: Entry point of the program. Parse command line arguments and set options.
640 Set up regular expressions. Call grep routines for each file as input.
641 returns: 1 on error 0 on success.
644 int main(int argc
, char **argv
) {
650 regex_t
*nl_exp
= NULL
;
651 char *regexpstr
= NULL
;
653 while((c
= getopt(argc
, argv
, "bce:insVw")) != -1) {
656 options
|= JG_PRINT_BYTEOFFSET
;
659 options
|= JG_PRINT_COUNT
;
662 if(!(regexpstr
= (char *) malloc(strlen(optarg
) + 1))) {
663 fprintf(stderr
, "Malloc failure.\n");
664 fprintf(stderr
, "Error: %s\n", strerror(errno
));
667 strcpy(regexpstr
, optarg
);
670 options
|= JG_IGNORE_CASE
;
673 options
|= JG_PRINT_LINE_NUMBER
;
676 options
|= JG_SUPRESS_ERROR
;
679 options
|= JG_INVERT
;
682 printf("%s\n", GVERSION
);
685 options
|= JG_WORD_EXPRESSIONS
;
688 fprintf(stderr
, "Unknown option -%c\n", c
);
689 fprintf(stderr
, Usage
, argv
[0]);
694 if(((argc
- optind
) >= 2)) {
695 regexpstr
= argv
[optind
];
696 fileindex
= optind
+ 1;
699 fprintf(stderr
, "Invalid arguments.\n");
700 fprintf(stderr
, Usage
, argv
[0]);
704 else if((argc
- optind
) == 1) {
708 fprintf(stderr
, "Invalid arguments.\n");
709 fprintf(stderr
, Usage
, argv
[0]);
713 if(opt_valid(options
)) {
714 regexp
= create_regexp(regexpstr
, options
);
715 if(options
& JG_PRINT_LINE_NUMBER
) nl_exp
= create_regexp("\n", 0);
717 for(; fileindex
< argc
; fileindex
++)
718 jargrep(regexp
, nl_exp
, argv
[fileindex
], options
);
720 if(options
& JG_PRINT_LINE_NUMBER
) regfree(nl_exp
);
724 fprintf(stderr
, "Error: Invalid combination of options.\n");