* xref.c (FILE_NAME_ABSOLUTE_P): Add parenthesis.
[official-gcc.git] / fastjar / jargrep.c
bloba1ea4bf322568d300cbccd854617aa3d4ddd7b78
1 /*
2 jargrep.c - main functions for jargrep utility
3 Copyright (C) 1999, 2000 Bryan Burns
4 Copyright (C) 2000 Cory Hollingsworth
6 Parts of this program are base on Bryan Burns work with fastjar
7 Copyright (C) 1999.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License
11 as published by the Free Software Foundation; either version 2
12 of the License, or (at your option) any later version.
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 /* $Id: jargrep.c,v 1.4 2000/12/15 18:45:09 tromey Exp $
26 $Log: jargrep.c,v $
27 Revision 1.4 2000/12/15 18:45:09 tromey
28 * jargrep.c: Include getopt.h if it exists.
29 (optind): Declare.
30 * configure, config.h: Rebuilt.
31 * configure.in: Check for getopt.h.
33 Revision 1.3 2000/12/14 18:45:35 ghazi
34 Warning fixes:
36 * compress.c: Include stdlib.h and compress.h.
37 (rcsid): Delete.
38 (report_str_error): Make static.
39 (ez_inflate_str): Delete unused variable. Add parens in if-stmt.
40 (hrd_inflate_str): Likewise.
42 * compress.h (init_compression, end_compression, init_inflation,
43 end_inflation): Prototype void arguments.
45 * dostime.c (rcsid): Delete.
47 * jargrep.c: Include ctype.h, stdlib.h, zlib.h and compress.h.
48 Make functions static. Cast ctype function argument to `unsigned
49 char'. Add parens in if-stmts. Constify.
50 (Usage): Change into a macro.
51 (jargrep): Remove unused parameter.
53 * jartool.c: Constify. Add parens in if-stmts. Align
54 signed/unsigned char pointers in functions calls using casts.
55 (rcsid): Delete.
56 (list_jar): Fix printf format specifier.
57 (usage): Chop long string into bits. Reformat.
59 * pushback.c (rcsid): Delete.
61 Revision 1.2 2000/12/11 02:59:55 apbianco
62 2000-12-10 Robert Lipe <robertlipe@usa.net>
64 * jargrep.c (jargrep): Added null statement after case.
66 2000-12-10 Alexandre Petit-Bianco <apbianco@cygnus.com>
68 * Makefile: Removed.
69 * Makefile.in: Rebuilt with `-i' and `--enable-foreign'.
71 (http://gcc.gnu.org/ml/gcc/2000-12/msg00294.html)
73 Revision 1.1 2000/12/09 03:08:23 apbianco
74 2000-12-08 Alexandre Petit-Bianco <apbianco@cygnus.com>
76 * fastjar: Imported.
78 Revision 1.8 2000/09/13 14:02:02 cory
79 Reformatted some of the code to more closly match the layout of the orriginal
80 fastjar utility.
82 Revision 1.7 2000/09/12 22:29:36 cory
83 Jargrep now seems to do what I want it to do. Performs properly on Linux x86,
84 will test some other platforms later.
89 #include "config.h"
90 #include <stdio.h>
91 #include <unistd.h>
92 #include <regex.h>
93 #include <errno.h>
94 #include <string.h>
95 #include <sys/types.h>
96 #include <sys/stat.h>
97 #include <fcntl.h>
98 #include <ctype.h>
99 #ifdef HAVE_STDLIB_H
100 #include <stdlib.h>
101 #endif
102 #include "jargrep.h"
103 #include "jartool.h"
104 #include "pushback.h"
105 #include "zipfile.h"
106 #include "zlib.h"
107 #include "compress.h"
108 #ifdef HAVE_GETOPT_H
109 #include <getopt.h>
110 #endif
112 #define Usage "Usage: %s [-bcinsw] <-e regexp | regexp> file(s)\n"
114 extern char *optarg;
115 extern int optind;
118 Function name: opt_valid
119 arg: options Bitfield flag that contains the command line options of grepjar.
120 purpose: To guard agains the occurance of certain incompatible flags being used
121 together.
122 returns: TRUE if options are valid, FALSE otherwise.
125 static int opt_valid(int options) {
126 int retflag;
128 if((options & JG_PRINT_COUNT) &&
129 (options & (JG_PRINT_BYTEOFFSET | JG_PRINT_LINE_NUMBER)))
131 retflag = FALSE;
133 else retflag = TRUE;
135 return retflag;
139 Function name: create_regexp
140 args: regstr String containing the uncompiled regular expression. This may be the
141 expression as is passed in through argv.
142 options This is the flag containing the commandline options that have been
143 parsed by getopt.
144 purpose: Handle the exception handling involved with setting upt a new regular
145 expression.
146 returns: Newly allocated compile regular expression ready to be used in an regexec call.
149 static regex_t *create_regexp(const char *regstr, int options) {
150 regex_t *exp;
151 int errcode;
152 int msgsize;
153 char *errmsg;
155 if((exp = (regex_t *) malloc(sizeof(regex_t))))
157 if((errcode = regcomp(exp, regstr, (options & JG_IGNORE_CASE) ? REG_ICASE : 0))) {
158 fprintf(stderr, "regcomp of regex failed,\n");
159 if((errmsg = (char *) malloc(msgsize = regerror(errcode, exp, NULL, 0) + 1))) {
160 regerror(errcode, exp, errmsg, msgsize);
161 fprintf(stderr, "Error: %s\n", errmsg);
162 free(exp);
163 free(errmsg);
164 exit(1);
166 else {
167 fprintf(stderr, "Malloc of errmsg failed.\n");
168 fprintf(stderr, "Error: %s\n", strerror(errno));
169 free(exp);
170 exit(1);
174 else {
175 fprintf(stderr, "Malloc of regex failed,\n");
176 fprintf(stderr, "Error: %s\n", strerror(errno));
177 exit(1);
180 return exp;
184 Function name: check_sig
185 args: scratch Pointer to array of bytes containing signature.
186 pbf Pointer to push back handle for jar file.
187 purpose: Verify that checksum is correct.
188 returns: 0, 1, or 2. 0 means we are ready to read embedded file information. 1 means
189 we have read beyound the embedded file list and can exit knowing we have read all the
190 relevent information. 2 means we still haven't reached embdedded file list and need to
191 do some more reading.
193 static int check_sig(ub1 *scratch, pb_file *pbfp) {
194 ub4 signature;
195 int retflag = 0;
197 signature = UNPACK_UB4(scratch, 0);
199 #ifdef DEBUG
200 printf("signature is %x\n", signature);
201 #endif
202 if(signature == 0x08074b50){
203 #ifdef DEBUG
204 printf("skipping data descriptor\n");
205 #endif
206 pb_read(pbfp, scratch, 12);
207 retflag = 2;
208 } else if(signature == 0x02014b50){
209 #ifdef DEBUG
210 printf("Central header reached.. we're all done!\n");
211 #endif
212 retflag = 1;
213 }else if(signature != 0x04034b50){
214 printf("Ick! %#x\n", signature);
215 retflag = 1;
218 return retflag;
222 Function name: decd_siz
223 args csize Pointer to embedded file's compressed size.
224 usize Pointer to embedded file's uncmpressed size.
225 fnlen Pointer to embedded file's file name length.
226 elfen Pointer to length of extra fields in jar file.
227 flags Pointer to bitmapped flags.
228 method Pointer to indicator of storage method of embedded file.
229 file_header Pointer to string containing the above values to be unbacked.
230 Purpose: Unpack the series of values from file_header.
233 static void decd_siz(ub4 *csize, ub4 *usize, ub2 *fnlen, ub2 *eflen, ub2 *flags, ub2 *method, ub1 *file_header) {
234 *csize = UNPACK_UB4(file_header, LOC_CSIZE);
235 #ifdef DEBUG
236 printf("Compressed size is %u\n", *csize);
237 #endif
239 *usize = UNPACK_UB4(file_header, LOC_USIZE);
240 #ifdef DEBUG
241 printf("Uncompressed size is %u\n", *usize);
242 #endif
244 *fnlen = UNPACK_UB2(file_header, LOC_FNLEN);
245 #ifdef DEBUG
246 printf("Filename length is %hu\n", *fnlen);
247 #endif
249 *eflen = UNPACK_UB2(file_header, LOC_EFLEN);
250 #ifdef DEBUG
251 printf("Extra field length is %hu\n", *eflen);
252 #endif
254 *flags = UNPACK_UB2(file_header, LOC_EXTRA);
255 #ifdef DEBUG
256 printf("Flags are %#hx\n", *flags);
257 #endif
259 *method = UNPACK_UB2(file_header, LOC_COMP);
260 #ifdef DEBUG
261 printf("Compression method is %#hx\n", *method);
262 #endif
267 Function name: new_filename
268 args: pbf Pointer to push back file handle. Used for reading input file.
269 len Length of file name to be read.
270 purpose: Read in the embedded file name from jar file.
271 returns: Pointer to newly allocated string containing file name.
274 static char *new_filename(pb_file *pbf, ub4 len) {
275 char *filename;
277 if(!(filename = (char *) malloc(len + 1))) {
278 fprintf(stderr, "Malloc failed of filename\n");
279 fprintf(stderr, "Error: %s\n", strerror(errno));
281 pb_read(pbf, filename, len);
282 filename[len] = '\0';
284 #ifdef DEBUG
285 printf("filename is %s\n", filename);
286 #endif
288 return filename;
292 Funtion name: read_string
293 args: pbf Pointer to push back file handle. Used for reading input file.
294 size Size of embedded file in bytes.
295 purpose: Create a string containing the contents of the embedded noncompressed file.
296 returns: Pointer to newly allocated string containing embedded file contents.
299 static char *read_string(pb_file *pbf, int size) {
300 char *page;
302 if((page = (char *) malloc(size + 1))) {
303 pb_read(pbf, page, size);
304 page[size] = '\0';
306 else {
307 fprintf(stderr, "Malloc of page buffer failed.\n");
308 fprintf(stderr, "Error: %s\n", strerror(errno));
309 exit(1);
312 return page;
316 Function name: extract_line
317 args: stream String containing the full contents of a file which is to be substringed
318 in order to provide line representing our grep output.
319 begin Index into stream which regular expression first matches.
320 end Index into stream which end of match to the regular expression.
321 b Pointer to the index of what will be the beginning of the line when
322 string is returned. Used for -b option.
323 purpose: Create a string that can be printed by jargrep from the long string stream.
324 The matching line that is printed out by jargrep is generated by this function.
325 returns: Pointer to newly allocated string containing matched expression.
328 static char *extract_line(const char *stream, regoff_t begin, regoff_t end, int *b) {
329 int e;
330 int length;
331 char *retstr;
333 for(*b = begin; *b >= 0 && !iscntrl((unsigned char)stream[*b]); (*b)--);
334 (*b)++;
335 for(e = end; stream[e] == '\t' || !iscntrl((unsigned char)stream[e]); e++);
336 length = e - *b;
337 if((retstr = (char *) malloc(length + 1))) {
338 sprintf(retstr, "%d:", *b);
339 strncpy(retstr, &(stream[*b]), length);
340 retstr[length] = '\0';
342 else {
343 fprintf(stderr, "Malloc failed of output string.\n");
344 fprintf(stderr, "Error: %s\n", strerror(errno));
345 exit(1);
348 return retstr;
352 Function name: chk_wrd
353 args: exp Pointer to compiled POSIX style regular expression of search target.
354 str String known to contain at least one match of exp.
355 purpose: Verify that the occurance of the regular expression in str occurs as a whole
356 word and not a substring of another word.
357 returns: TRUE if it is a word, FALSE of it is a substring.
360 static int chk_wrd(regex_t *exp, const char *str) {
361 int wrd_fnd = FALSE;
362 int regflag;
363 int frnt_ok;
364 int bck_ok;
365 const char *str2;
366 regmatch_t match;
368 str2 = str;
369 frnt_ok = bck_ok = FALSE;
370 while(!wrd_fnd && !(regflag = regexec(exp, str2, 1, &match, 0))) {
371 if(!match.rm_so && (str2 == str)) frnt_ok = TRUE;
372 else if(!isalnum((unsigned char)str2[match.rm_so - 1])
373 && str2[match.rm_so - 1] != '_')
374 frnt_ok = TRUE;
375 else frnt_ok = FALSE;
376 if(frnt_ok) {
377 if(str2[match.rm_eo] == '\0') bck_ok = TRUE;
378 else if(!isalnum((unsigned char)str2[match.rm_eo])
379 && str2[match.rm_eo] != '_')
380 bck_ok = TRUE;
381 else bck_ok = FALSE;
383 wrd_fnd = frnt_ok && bck_ok;
384 str2 = &(str2[match.rm_eo]);
387 return wrd_fnd;
391 Function name: prnt_mtchs
392 args: exp Pointer to compiled POSIX style regular expression of search target.
393 filename String containing the name of the embedded file which matches have
394 been found in.
395 stream String containing the processed contents of the embedded jar file
396 represended with filename.
397 pmatch Array of regmatch_t matches into stream.
398 nl_offset Array of offsets of '\n' characters in stream. May be NULL if -n is
399 not set on command line.
400 num Number of matches in pmatch array.
401 lines Number of lines in file. Not set if -n is not set on command line.
402 options Bitwise flag containing flags set to represent the command line
403 options.
404 purpose: Control output of jargrep. Output is controlled by which options have been
405 set at the command line.
408 static void prnt_mtchs(regex_t *exp, const char *filename, const char *stream, regmatch_t *pmatch, regmatch_t *nl_offset, int num, int lines, int options) {
409 int i;
410 int j = 0;
411 int ln_cnt;
412 int begin;
413 int o_begin;
414 char *str;
416 o_begin = -1;
417 ln_cnt = 0;
418 for(i = 0; i < num; i++) {
419 str = extract_line(stream, pmatch[i].rm_so, pmatch[i].rm_eo, &begin);
420 if(begin > o_begin) {
421 if(!(options & JG_WORD_EXPRESSIONS) || chk_wrd(exp, str)) {
422 ln_cnt++;
423 if(!(options & JG_PRINT_COUNT)) {
424 printf("%s:", filename);
425 if(options & JG_PRINT_LINE_NUMBER) {
426 for(; j < lines && nl_offset[j].rm_so < begin; j++);
427 printf("%d:", j + 1);
429 if(options & JG_PRINT_BYTEOFFSET) printf("%d:", begin);
430 printf("%s\n", str);
434 o_begin = begin;
435 free(str);
437 if(options & JG_PRINT_COUNT) printf("%s:%d\n", filename, ln_cnt);
441 Function name: check_crc
442 args: pbf Pointer to pushback file pointer for jar file.
443 stream String containing the non modified contents fo the extraced file entry.
444 usize Size of file in bytes.
445 purpose: Verify the CRC matches that as what is stored in the jar file.
448 static void check_crc(pb_file *pbf, const char *stream, ub4 usize) {
449 ub4 crc=0;
450 ub4 lcrc;
451 ub1 scratch[16];
453 crc = crc32(crc, NULL, 0);
454 crc = crc32(crc, (const unsigned char *)stream, usize);
455 if(pb_read(pbf, scratch, 16) != 16) {
456 perror("read");
457 exit(1);
459 if(UNPACK_UB4(scratch, 0) != 0x08074b50) {
460 fprintf(stderr, "Error! Missing data descriptor!\n");
461 exit(1);
463 lcrc = UNPACK_UB4(scratch, 4);
464 if(crc != lcrc){
465 fprintf(stderr, "Error! CRCs do not match! Got %x, expected %x\n",
466 crc, lcrc);
467 exit(1);
472 Function name mk_ascii
473 args: stream String that contains the contents of the extraced file entry.
474 usize String size.
475 purpose: Make certain that the contents of the file are ASCII, not binary. This
476 permits grepping of binary files as well by converting non ASCII and control characters
477 into '\n'.
480 static void mk_ascii(char *stream, size_t usize) {
481 size_t i;
483 for(i = 0; i < usize; i++)
484 if(stream[i] != '\t'
485 && (iscntrl((unsigned char)stream[i])
486 || (unsigned char) stream[i] >= 128))
487 stream[i] = '\n';
491 Funtion name: fnd_match
492 args: exp Pointer to compiled POSIX style regular expression of search target.
493 str_stream String that contains the contents of the extracted file entry.
494 i Pointer to counter and index of matches.
495 purpose: Search str_stream for occurances of the regular expression exp and create
496 an array of matches.
497 returns: Pointer to newly allocated array of regmatch_t which gives indexes to start
498 and end of matches. NULL is returned upon no matches found.
501 static regmatch_t *fnd_match(regex_t *exp, const char *str_stream, int *i) {
502 int regflag;
503 regmatch_t match;
504 regmatch_t *match_array;
505 regmatch_t *tmp;
507 match_array = NULL;
508 for(*i = 0, regflag = regexec(exp, str_stream, 1, &match, 0); !regflag;
509 regflag = regexec(exp, &(str_stream[match.rm_eo]), 1, &match, 0), (*i)++)
511 if((tmp = (regmatch_t *)
512 realloc(match_array, sizeof(regmatch_t) * ((*i) + 1))))
514 match_array = tmp;
515 if(*i) {
516 match.rm_so += match_array[(*i) - 1].rm_eo;
517 match.rm_eo += match_array[(*i) - 1].rm_eo;
519 match_array[*i] = match;
521 else {
522 fprintf(stderr, "Realloc of match_array failed.\n");
523 fprintf(stderr, "Error: %s\n", strerror(errno));
524 exit(1);
528 return match_array;
532 Function name: cont_grep
533 args: exp Pointer to compiled POSIX style regular expression of search target.
534 nl_exp Pointer to compiled POSIX style regular expression of newlines. This
535 argument is NULL unless the -n option is used on the command line.
536 fd File descriptor of the jar file being grepped.
537 pbf Pointer to pushback file style file stream. This is for use with
538 the pushback.c file io funtions.
539 options Bitwise flag containing flags set to represent the command line options.
540 purpose: This function handles single entries in an open jar file. The header is
541 read and then the embeded file is extracted and grepped.
542 returns: FALSE upon failure, TRUE otherwise.
545 static int cont_grep(regex_t *exp, regex_t *nl_exp, int fd, pb_file *pbf, int options) {
546 int retflag = TRUE;
547 int i;
548 int j;
549 ub4 csize;
550 ub4 usize;
551 ub2 fnlen;
552 ub2 eflen;
553 ub2 flags;
554 ub2 method;
555 ub1 file_header[30];
556 char *filename;
557 char *str_stream;
558 regmatch_t *match_array;
559 regmatch_t *nl_offsets=0;
561 if(pb_read(pbf, (file_header + 4), 26) != 26) {
562 perror("read");
563 retflag = FALSE;
565 else {
566 decd_siz(&csize, &usize, &fnlen, &eflen, &flags, &method, file_header);
567 filename = new_filename(pbf, fnlen);
568 lseek(fd, eflen, SEEK_CUR);
569 if(filename[fnlen - 1] != '/') {
570 str_stream = (method == 8 || (flags & 0x0008)) ?
571 (char *) inflate_string(pbf, &csize, &usize) :
572 read_string(pbf, csize);
573 if(flags & 0x008) check_crc(pbf, str_stream, usize);
574 mk_ascii(str_stream, usize);
575 match_array = fnd_match(exp, str_stream, &i);
576 if((options & JG_PRINT_LINE_NUMBER) && i)
577 nl_offsets = fnd_match(nl_exp, str_stream, &j);
578 prnt_mtchs(exp, filename, str_stream, match_array, nl_offsets, i, j, options);
579 if(match_array) free(match_array);
580 free(str_stream);
582 free(filename);
583 retflag = TRUE;
586 return retflag;
590 Funtion name: jargrep
591 args: exp Pointer to compiled POSIX style regular expression of search target.
592 nl_exp Pointer to compiled regular expression for newlines or NULL. Only set
593 if -n option is present at command line.
594 jarfile Filename of jar file to be searched.
595 options Bitwise flag containing flags set to represent the command line options.
596 purpose: Open jar file. Check signatures. When right signature is found go to deeper
597 grep routine.
600 static void jargrep(regex_t *exp, regex_t *nl_exp, const char *jarfile, int options){
601 int fd;
602 int floop = TRUE;
603 pb_file pbf;
604 ub1 scratch[16];
606 if((fd = open(jarfile, O_RDONLY)) == -1) {
607 if(!(options & JG_SUPRESS_ERROR))
608 fprintf(stderr, "Error reading file '%s': %s\n", jarfile, strerror(errno));
610 else {
611 pb_init(&pbf, fd);
613 do {
614 if(pb_read(&pbf, scratch, 4) != 4) {
615 perror("read");
616 floop = FALSE;
618 else {
619 switch (check_sig(scratch, &pbf)) {
620 case 0:
621 floop = cont_grep(exp, nl_exp, fd, &pbf, options);
622 break;
623 case 1:
624 floop = FALSE;
625 break;
626 case 2:
627 /* fall through continue */
631 } while(floop);
636 Funtion Name: main
637 args: argc number of in coming args.
638 argv array of strings.
639 purpose: Entry point of the program. Parse command line arguments and set options.
640 Set up regular expressions. Call grep routines for each file as input.
641 returns: 1 on error 0 on success.
644 int main(int argc, char **argv) {
645 int c;
646 int retval = 0;
647 int fileindex;
648 int options = 0;
649 regex_t *regexp;
650 regex_t *nl_exp = NULL;
651 char *regexpstr = NULL;
653 while((c = getopt(argc, argv, "bce:insVw")) != -1) {
654 switch(c) {
655 case 'b':
656 options |= JG_PRINT_BYTEOFFSET;
657 break;
658 case 'c':
659 options |= JG_PRINT_COUNT;
660 break;
661 case 'e':
662 if(!(regexpstr = (char *) malloc(strlen(optarg) + 1))) {
663 fprintf(stderr, "Malloc failure.\n");
664 fprintf(stderr, "Error: %s\n", strerror(errno));
665 exit(1);
667 strcpy(regexpstr, optarg);
668 break;
669 case 'i':
670 options |= JG_IGNORE_CASE;
671 break;
672 case 'n':
673 options |= JG_PRINT_LINE_NUMBER;
674 break;
675 case 's':
676 options |= JG_SUPRESS_ERROR;
677 break;
678 case 'v':
679 options |= JG_INVERT;
680 break;
681 case 'V':
682 printf("%s\n", GVERSION);
683 exit(0);
684 case 'w':
685 options |= JG_WORD_EXPRESSIONS;
686 break;
687 default:
688 fprintf(stderr, "Unknown option -%c\n", c);
689 fprintf(stderr, Usage, argv[0]);
690 exit(1);
693 if(!regexpstr){
694 if(((argc - optind) >= 2)) {
695 regexpstr = argv[optind];
696 fileindex = optind + 1;
698 else {
699 fprintf(stderr, "Invalid arguments.\n");
700 fprintf(stderr, Usage, argv[0]);
701 exit(1);
704 else if((argc - optind) == 1) {
705 fileindex = optind;
707 else {
708 fprintf(stderr, "Invalid arguments.\n");
709 fprintf(stderr, Usage, argv[0]);
710 exit(1);
713 if(opt_valid(options)) {
714 regexp = create_regexp(regexpstr, options);
715 if(options & JG_PRINT_LINE_NUMBER) nl_exp = create_regexp("\n", 0);
716 init_inflation();
717 for(; fileindex < argc; fileindex++)
718 jargrep(regexp, nl_exp, argv[fileindex], options);
719 regfree(regexp);
720 if(options & JG_PRINT_LINE_NUMBER) regfree(nl_exp);
722 else {
723 retval = 1;
724 fprintf(stderr, "Error: Invalid combination of options.\n");
727 return retval;