Update version
[official-gcc.git] / fastjar / jargrep.c
blobef3baf0010d37c3322a160b794a19894c6dcb711
1 /*
2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999, 2000 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
7 Copyright (C) 1999.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.3 2000/12/14 18:45:35 ghazi Exp $
26 $Log: jargrep.c,v $
27 Revision 1.3 2000/12/14 18:45:35 ghazi
28 Warning fixes:
30 * compress.c: Include stdlib.h and compress.h.
31 (rcsid): Delete.
32 (report_str_error): Make static.
33 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
34 (hrd_inflate_str): Likewise.
36 * compress.h (init_compression, end_compression, init_inflation,
37 end_inflation): Prototype void arguments.
39 * dostime.c (rcsid): Delete.
41 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
42 Make functions static. Cast ctype function argument to `unsigned
43 char'. Add parens in if-stmts. Constify.
44 (Usage): Change into a macro.
45 (jargrep): Remove unused parameter.
47 * jartool.c: Constify. Add parens in if-stmts. Align
48 signed/unsigned char pointers in functions calls using casts.
49 (rcsid): Delete.
50 (list_jar): Fix printf format specifier.
51 (usage): Chop long string into bits. Reformat.
53 * pushback.c (rcsid): Delete.
55 Revision 1.2 2000/12/11 02:59:55 apbianco
56 2000-12-10 Robert Lipe <robertlipe@usa.net>
58 * jargrep.c (jargrep): Added null statement after case.
60 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
62 * Makefile: Removed.
63 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
65 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
67 Revision 1.1 2000/12/09 03:08:23 apbianco
68 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
70 * fastjar: Imported.
72 Revision 1.8 2000/09/13 14:02:02 cory
73 Reformatted some of the code to more closly match the layout of the orriginal
74 fastjar utility.
76 Revision 1.7 2000/09/12 22:29:36 cory
77 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
78 will test some other platforms later.
83 #include "config.h"
84 #include <stdio.h>
85 #include <unistd.h>
86 #include <regex.h>
87 #include <errno.h>
88 #include <string.h>
89 #include <sys/types.h>
90 #include <sys/stat.h>
91 #include <fcntl.h>
92 #include <ctype.h>
93 #ifdef STDC_HEADERS
94 #include <stdlib.h>
95 #endif
96 #include "jargrep.h"
97 #include "jartool.h"
98 #include "pushback.h"
99 #include "zipfile.h"
100 #include "zlib.h"
101 #include "compress.h"
102 #ifdef HAVE_GETOPT_H
103 #include <getopt.h>
104 #endif
106 #define Usage "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n"
108 extern char *optarg;
109 extern int optind;
112 Function name: opt_valid
113 arg: options Bitfield flag that contains the command line options of grepjar.
114 purpose: To guard agains the occurance of certain incompatible flags being used
115 together.
116 returns: TRUE if options are valid, FALSE otherwise.
119 static int opt_valid(int options) {
120 int retflag;
122 if((options & JG_PRINT_COUNT) &&
123 (options & (JG_PRINT_BYTEOFFSET | JG_PRINT_LINE_NUMBER)))
125 retflag = FALSE;
127 else retflag = TRUE;
129 return retflag;
133 Function name: create_regexp
134 args: regstr String containing the uncompiled regular expression. This may be the
135 expression as is passed in through argv.
136 options This is the flag containing the commandline options that have been
137 parsed by getopt.
138 purpose: Handle the exception handling involved with setting upt a new regular
139 expression.
140 returns: Newly allocated compile regular expression ready to be used in an regexec call.
143 static regex_t *create_regexp(const char *regstr, int options) {
144 regex_t *exp;
145 int errcode;
146 int msgsize;
147 char *errmsg;
149 if((exp = (regex_t *) malloc(sizeof(regex_t))))
151 if((errcode = regcomp(exp, regstr, (options & JG_IGNORE_CASE) ? REG_ICASE : 0))) {
152 fprintf(stderr, "regcomp of regex failed,\n");
153 if((errmsg = (char *) malloc(msgsize = regerror(errcode, exp, NULL, 0) + 1))) {
154 regerror(errcode, exp, errmsg, msgsize);
155 fprintf(stderr, "Error: %s\n", errmsg);
156 free(exp);
157 free(errmsg);
158 exit(1);
160 else {
161 fprintf(stderr, "Malloc of errmsg failed.\n");
162 fprintf(stderr, "Error: %s\n", strerror(errno));
163 free(exp);
164 exit(1);
168 else {
169 fprintf(stderr, "Malloc of regex failed,\n");
170 fprintf(stderr, "Error: %s\n", strerror(errno));
171 exit(1);
174 return exp;
178 Function name: check_sig
179 args: scratch Pointer to array of bytes containing signature.
180 pbf Pointer to push back handle for jar file.
181 purpose: Verify that checksum is correct.
182 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
183 we have read beyound the embedded file list and can exit knowing we have read all the
184 relevent information. 2 means we still haven't reached embdedded file list and need to
185 do some more reading.
187 static int check_sig(ub1 *scratch, pb_file *pbfp) {
188 ub4 signature;
189 int retflag = 0;
191 signature = UNPACK_UB4(scratch, 0);
193 #ifdef DEBUG
194 printf("signature is %x\n", signature);
195 #endif
196 if(signature == 0x08074b50){
197 #ifdef DEBUG
198 printf("skipping data descriptor\n");
199 #endif
200 pb_read(pbfp, scratch, 12);
201 retflag = 2;
202 } else if(signature == 0x02014b50){
203 #ifdef DEBUG
204 printf("Central header reached.. we're all done!\n");
205 #endif
206 retflag = 1;
207 }else if(signature != 0x04034b50){
208 printf("Ick! %#x\n", signature);
209 retflag = 1;
212 return retflag;
216 Function name: decd_siz
217 args csize Pointer to embedded file's compressed size.
218 usize Pointer to embedded file's uncmpressed size.
219 fnlen Pointer to embedded file's file name length.
220 elfen Pointer to length of extra fields in jar file.
221 flags Pointer to bitmapped flags.
222 method Pointer to indicator of storage method of embedded file.
223 file_header Pointer to string containing the above values to be unbacked.
224 Purpose: Unpack the series of values from file_header.
227 static void decd_siz(ub4 *csize, ub4 *usize, ub2 *fnlen, ub2 *eflen, ub2 *flags, ub2 *method, ub1 *file_header) {
228 *csize = UNPACK_UB4(file_header, LOC_CSIZE);
229 #ifdef DEBUG
230 printf("Compressed size is %u\n", *csize);
231 #endif
233 *usize = UNPACK_UB4(file_header, LOC_USIZE);
234 #ifdef DEBUG
235 printf("Uncompressed size is %u\n", *usize);
236 #endif
238 *fnlen = UNPACK_UB2(file_header, LOC_FNLEN);
239 #ifdef DEBUG
240 printf("Filename length is %hu\n", *fnlen);
241 #endif
243 *eflen = UNPACK_UB2(file_header, LOC_EFLEN);
244 #ifdef DEBUG
245 printf("Extra field length is %hu\n", *eflen);
246 #endif
248 *flags = UNPACK_UB2(file_header, LOC_EXTRA);
249 #ifdef DEBUG
250 printf("Flags are %#hx\n", *flags);
251 #endif
253 *method = UNPACK_UB2(file_header, LOC_COMP);
254 #ifdef DEBUG
255 printf("Compression method is %#hx\n", *method);
256 #endif
261 Function name: new_filename
262 args: pbf Pointer to push back file handle. Used for reading input file.
263 len Length of file name to be read.
264 purpose: Read in the embedded file name from jar file.
265 returns: Pointer to newly allocated string containing file name.
268 static char *new_filename(pb_file *pbf, ub4 len) {
269 char *filename;
271 if(!(filename = (char *) malloc(len + 1))) {
272 fprintf(stderr, "Malloc failed of filename\n");
273 fprintf(stderr, "Error: %s\n", strerror(errno));
275 pb_read(pbf, filename, len);
276 filename[len] = '\0';
278 #ifdef DEBUG
279 printf("filename is %s\n", filename);
280 #endif
282 return filename;
286 Funtion name: read_string
287 args: pbf Pointer to push back file handle. Used for reading input file.
288 size Size of embedded file in bytes.
289 purpose: Create a string containing the contents of the embedded noncompressed file.
290 returns: Pointer to newly allocated string containing embedded file contents.
293 static char *read_string(pb_file *pbf, int size) {
294 char *page;
296 if((page = (char *) malloc(size + 1))) {
297 pb_read(pbf, page, size);
298 page[size] = '\0';
300 else {
301 fprintf(stderr, "Malloc of page buffer failed.\n");
302 fprintf(stderr, "Error: %s\n", strerror(errno));
303 exit(1);
306 return page;
310 Function name: extract_line
311 args: stream String containing the full contents of a file which is to be substringed
312 in order to provide line representing our grep output.
313 begin Index into stream which regular expression first matches.
314 end Index into stream which end of match to the regular expression.
315 b Pointer to the index of what will be the beginning of the line when
316 string is returned. Used for -b option.
317 purpose: Create a string that can be printed by jargrep from the long string stream.
318 The matching line that is printed out by jargrep is generated by this function.
319 returns: Pointer to newly allocated string containing matched expression.
322 static char *extract_line(const char *stream, regoff_t begin, regoff_t end, int *b) {
323 int e;
324 int length;
325 char *retstr;
327 for(*b = begin; *b >= 0 && !iscntrl((unsigned char)stream[*b]); (*b)--);
328 (*b)++;
329 for(e = end; stream[e] == '\t' || !iscntrl((unsigned char)stream[e]); e++);
330 length = e - *b;
331 if((retstr = (char *) malloc(length + 1))) {
332 sprintf(retstr, "%d:", *b);
333 strncpy(retstr, &(stream[*b]), length);
334 retstr[length] = '\0';
336 else {
337 fprintf(stderr, "Malloc failed of output string.\n");
338 fprintf(stderr, "Error: %s\n", strerror(errno));
339 exit(1);
342 return retstr;
346 Function name: chk_wrd
347 args: exp Pointer to compiled POSIX style regular expression of search target.
348 str String known to contain at least one match of exp.
349 purpose: Verify that the occurance of the regular expression in str occurs as a whole
350 word and not a substring of another word.
351 returns: TRUE if it is a word, FALSE of it is a substring.
354 static int chk_wrd(regex_t *exp, const char *str) {
355 int wrd_fnd = FALSE;
356 int regflag;
357 int frnt_ok;
358 int bck_ok;
359 const char *str2;
360 regmatch_t match;
362 str2 = str;
363 frnt_ok = bck_ok = FALSE;
364 while(!wrd_fnd && !(regflag = regexec(exp, str2, 1, &match, 0))) {
365 if(!match.rm_so && (str2 == str)) frnt_ok = TRUE;
366 else if(!isalnum((unsigned char)str2[match.rm_so - 1])
367 && str2[match.rm_so - 1] != '_')
368 frnt_ok = TRUE;
369 else frnt_ok = FALSE;
370 if(frnt_ok) {
371 if(str2[match.rm_eo] == '\0') bck_ok = TRUE;
372 else if(!isalnum((unsigned char)str2[match.rm_eo])
373 && str2[match.rm_eo] != '_')
374 bck_ok = TRUE;
375 else bck_ok = FALSE;
377 wrd_fnd = frnt_ok && bck_ok;
378 str2 = &(str2[match.rm_eo]);
381 return wrd_fnd;
385 Function name: prnt_mtchs
386 args: exp Pointer to compiled POSIX style regular expression of search target.
387 filename String containing the name of the embedded file which matches have
388 been found in.
389 stream String containing the processed contents of the embedded jar file
390 represended with filename.
391 pmatch Array of regmatch_t matches into stream.
392 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
393 not set on command line.
394 num Number of matches in pmatch array.
395 lines Number of lines in file. Not set if -n is not set on command line.
396 options Bitwise flag containing flags set to represent the command line
397 options.
398 purpose: Control output of jargrep. Output is controlled by which options have been
399 set at the command line.
402 static void prnt_mtchs(regex_t *exp, const char *filename, const char *stream, regmatch_t *pmatch, regmatch_t *nl_offset, int num, int lines, int options) {
403 int i;
404 int j = 0;
405 int ln_cnt;
406 int begin;
407 int o_begin;
408 char *str;
410 o_begin = -1;
411 ln_cnt = 0;
412 for(i = 0; i < num; i++) {
413 str = extract_line(stream, pmatch[i].rm_so, pmatch[i].rm_eo, &begin);
414 if(begin > o_begin) {
415 if(!(options & JG_WORD_EXPRESSIONS) || chk_wrd(exp, str)) {
416 ln_cnt++;
417 if(!(options & JG_PRINT_COUNT)) {
418 printf("%s:", filename);
419 if(options & JG_PRINT_LINE_NUMBER) {
420 for(; j < lines && nl_offset[j].rm_so < begin; j++);
421 printf("%d:", j + 1);
423 if(options & JG_PRINT_BYTEOFFSET) printf("%d:", begin);
424 printf("%s\n", str);
428 o_begin = begin;
429 free(str);
431 if(options & JG_PRINT_COUNT) printf("%s:%d\n", filename, ln_cnt);
435 Function name: check_crc
436 args: pbf Pointer to pushback file pointer for jar file.
437 stream String containing the non modified contents fo the extraced file entry.
438 usize Size of file in bytes.
439 purpose: Verify the CRC matches that as what is stored in the jar file.
442 static void check_crc(pb_file *pbf, const char *stream, ub4 usize) {
443 ub4 crc;
444 ub4 lcrc;
445 ub1 scratch[16];
447 crc = crc32(crc, NULL, 0);
448 crc = crc32(crc, (const unsigned char *)stream, usize);
449 if(pb_read(pbf, scratch, 16) != 16) {
450 perror("read");
451 exit(1);
453 if(UNPACK_UB4(scratch, 0) != 0x08074b50) {
454 fprintf(stderr, "Error! Missing data descriptor!\n");
455 exit(1);
457 lcrc = UNPACK_UB4(scratch, 4);
458 if(crc != lcrc){
459 fprintf(stderr, "Error! CRCs do not match! Got %x, expected %x\n",
460 crc, lcrc);
461 exit(1);
466 Function name mk_ascii
467 args: stream String that contains the contents of the extraced file entry.
468 usize String size.
469 purpose: Make certain that the contents of the file are ASCII, not binary. This
470 permits grepping of binary files as well by converting non ASCII and control characters
471 into '\n'.
474 static void mk_ascii(char *stream, size_t usize) {
475 size_t i;
477 for(i = 0; i < usize; i++)
478 if(stream[i] != '\t'
479 && (iscntrl((unsigned char)stream[i])
480 || (unsigned char) stream[i] >= 128))
481 stream[i] = '\n';
485 Funtion name: fnd_match
486 args: exp Pointer to compiled POSIX style regular expression of search target.
487 str_stream String that contains the contents of the extracted file entry.
488 i Pointer to counter and index of matches.
489 purpose: Search str_stream for occurances of the regular expression exp and create
490 an array of matches.
491 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
492 and end of matches. NULL is returned upon no matches found.
495 static regmatch_t *fnd_match(regex_t *exp, const char *str_stream, int *i) {
496 int regflag;
497 regmatch_t match;
498 regmatch_t *match_array;
499 regmatch_t *tmp;
501 match_array = NULL;
502 for(*i = 0, regflag = regexec(exp, str_stream, 1, &match, 0); !regflag;
503 regflag = regexec(exp, &(str_stream[match.rm_eo]), 1, &match, 0), (*i)++)
505 if((tmp = (regmatch_t *)
506 realloc(match_array, sizeof(regmatch_t) * ((*i) + 1))))
508 match_array = tmp;
509 if(*i) {
510 match.rm_so += match_array[(*i) - 1].rm_eo;
511 match.rm_eo += match_array[(*i) - 1].rm_eo;
513 match_array[*i] = match;
515 else {
516 fprintf(stderr, "Realloc of match_array failed.\n");
517 fprintf(stderr, "Error: %s\n", strerror(errno));
518 exit(1);
522 return match_array;
526 Function name: cont_grep
527 args: exp Pointer to compiled POSIX style regular expression of search target.
528 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
529 argument is NULL unless the -n option is used on the command line.
530 fd File descriptor of the jar file being grepped.
531 pbf Pointer to pushback file style file stream. This is for use with
532 the pushback.c file io funtions.
533 options Bitwise flag containing flags set to represent the command line options.
534 purpose: This function handles single entries in an open jar file. The header is
535 read and then the embeded file is extracted and grepped.
536 returns: FALSE upon failure, TRUE otherwise.
539 static int cont_grep(regex_t *exp, regex_t *nl_exp, int fd, pb_file *pbf, int options) {
540 int retflag = TRUE;
541 int i;
542 int j;
543 ub4 csize;
544 ub4 usize;
545 ub2 fnlen;
546 ub2 eflen;
547 ub2 flags;
548 ub2 method;
549 ub1 file_header[30];
550 char *filename;
551 char *str_stream;
552 regmatch_t *match_array;
553 regmatch_t *nl_offsets;
555 if(pb_read(pbf, (file_header + 4), 26) != 26) {
556 perror("read");
557 retflag = FALSE;
559 else {
560 decd_siz(&csize, &usize, &fnlen, &eflen, &flags, &method, file_header);
561 filename = new_filename(pbf, fnlen);
562 lseek(fd, eflen, SEEK_CUR);
563 if(filename[fnlen - 1] != '/') {
564 str_stream = (method == 8 || (flags & 0x0008)) ?
565 (char *) inflate_string(pbf, &csize, &usize) :
566 read_string(pbf, csize);
567 if(flags & 0x008) check_crc(pbf, str_stream, usize);
568 mk_ascii(str_stream, usize);
569 match_array = fnd_match(exp, str_stream, &i);
570 if((options & JG_PRINT_LINE_NUMBER) && i)
571 nl_offsets = fnd_match(nl_exp, str_stream, &j);
572 prnt_mtchs(exp, filename, str_stream, match_array, nl_offsets, i, j, options);
573 if(match_array) free(match_array);
574 free(str_stream);
576 free(filename);
577 retflag = TRUE;
580 return retflag;
584 Funtion name: jargrep
585 args: exp Pointer to compiled POSIX style regular expression of search target.
586 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
587 if -n option is present at command line.
588 jarfile Filename of jar file to be searched.
589 options Bitwise flag containing flags set to represent the command line options.
590 purpose: Open jar file. Check signatures. When right signature is found go to deeper
591 grep routine.
594 static void jargrep(regex_t *exp, regex_t *nl_exp, const char *jarfile, int options){
595 int fd;
596 int floop = TRUE;
597 pb_file pbf;
598 ub1 scratch[16];
600 if((fd = open(jarfile, O_RDONLY)) == -1) {
601 if(!(options & JG_SUPRESS_ERROR))
602 fprintf(stderr, "Error reading file '%s': %s\n", jarfile, strerror(errno));
604 else {
605 pb_init(&pbf, fd);
607 do {
608 if(pb_read(&pbf, scratch, 4) != 4) {
609 perror("read");
610 floop = FALSE;
612 else {
613 switch (check_sig(scratch, &pbf)) {
614 case 0:
615 floop = cont_grep(exp, nl_exp, fd, &pbf, options);
616 break;
617 case 1:
618 floop = FALSE;
619 break;
620 case 2:
621 /* fall through continue */
625 } while(floop);
630 Funtion Name: main
631 args: argc number of in coming args.
632 argv array of strings.
633 purpose: Entry point of the program. Parse command line arguments and set options.
634 Set up regular expressions. Call grep routines for each file as input.
635 returns: 1 on error 0 on success.
638 int main(int argc, char **argv) {
639 int c;
640 int retval = 0;
641 int fileindex;
642 int options = 0;
643 regex_t *regexp;
644 regex_t *nl_exp = NULL;
645 char *regexpstr = NULL;
647 while((c = getopt(argc, argv, "bce:insVw")) != -1) {
648 switch(c) {
649 case 'b':
650 options |= JG_PRINT_BYTEOFFSET;
651 break;
652 case 'c':
653 options |= JG_PRINT_COUNT;
654 break;
655 case 'e':
656 if(!(regexpstr = (char *) malloc(strlen(optarg) + 1))) {
657 fprintf(stderr, "Malloc failure.\n");
658 fprintf(stderr, "Error: %s\n", strerror(errno));
659 exit(1);
661 strcpy(regexpstr, optarg);
662 break;
663 case 'i':
664 options |= JG_IGNORE_CASE;
665 break;
666 case 'n':
667 options |= JG_PRINT_LINE_NUMBER;
668 break;
669 case 's':
670 options |= JG_SUPRESS_ERROR;
671 break;
672 case 'v':
673 options |= JG_INVERT;
674 break;
675 case 'V':
676 printf("%s\n", GVERSION);
677 exit(0);
678 case 'w':
679 options |= JG_WORD_EXPRESSIONS;
680 break;
681 default:
682 fprintf(stderr, "Unknown option -%c\n", c);
683 fprintf(stderr, Usage, argv[0]);
684 exit(1);
687 if(!regexpstr){
688 if(((argc - optind) >= 2)) {
689 regexpstr = argv[optind];
690 fileindex = optind + 1;
692 else {
693 fprintf(stderr, "Invalid arguments.\n");
694 fprintf(stderr, Usage, argv[0]);
695 exit(1);
698 else if((argc - optind) == 1) {
699 fileindex = optind;
701 else {
702 fprintf(stderr, "Invalid arguments.\n");
703 fprintf(stderr, Usage, argv[0]);
704 exit(1);
707 if(opt_valid(options)) {
708 regexp = create_regexp(regexpstr, options);
709 if(options & JG_PRINT_LINE_NUMBER) nl_exp = create_regexp("\n", 0);
710 init_inflation();
711 for(; fileindex < argc; fileindex++)
712 jargrep(regexp, nl_exp, argv[fileindex], options);
713 regfree(regexp);
714 if(options & JG_PRINT_LINE_NUMBER) regfree(nl_exp);
716 else {
717 retval = 1;
718 fprintf(stderr, "Error: Invalid combination of options.\n");
721 return retval;