mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / extra / replace.c
blob35739a60dcacfa698d9e21a6387018483f360ca5
1 /*
2 Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
7 the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 Replace strings in textfile
22 This program replaces strings in files or from stdin to stdout.
23 It accepts a list of from-string/to-string pairs and replaces
24 each occurrence of a from-string with the corresponding to-string.
25 The first occurrence of a found string is matched. If there is more
26 than one possibility for the string to replace, longer matches
27 are preferred before shorter matches.
29 Special characters in from string:
30 \^ Match start of line.
31 \$ Match end of line.
32 \b Match space-character, start of line or end of line.
33 For end \b the next replace starts locking at the end space-character.
34 An \b alone or in a string matches only a space-character.
35 \r, \t, \v as in C.
36 The programs make a DFA-state-machine of the strings and the speed isn't
37 dependent on the count of replace-strings (only of the number of replaces).
38 A line is assumed ending with \n or \0.
39 There are no limit exept memory on length of strings.
41 Written by Monty.
42 fill_buffer_retaining() is taken from gnu-grep and modified.
45 #define DONT_USE_RAID
46 #include <my_global.h>
47 #include <m_ctype.h>
48 #include <my_sys.h>
49 #include <m_string.h>
50 #include <errno.h>
52 #define PC_MALLOC 256 /* Bytes for pointers */
53 #define PS_MALLOC 512 /* Bytes for data */
55 typedef struct st_pointer_array { /* when using array-strings */
56 TYPELIB typelib; /* Pointer to strings */
57 uchar *str; /* Strings is here */
58 int7 *flag; /* Flag about each var. */
59 uint array_allocs,max_count,length,max_length;
60 } POINTER_ARRAY;
62 #define SPACE_CHAR 256
63 #define START_OF_LINE 257
64 #define END_OF_LINE 258
65 #define LAST_CHAR_CODE 259
67 typedef struct st_replace {
68 my_bool found;
69 struct st_replace *next[256];
70 } REPLACE;
72 typedef struct st_replace_found {
73 my_bool found;
74 char *replace_string;
75 uint to_offset;
76 int from_offset;
77 } REPLACE_STRING;
79 #ifndef WORD_BIT
80 #define WORD_BIT (8*sizeof(uint))
81 #endif
83 /* functions defined in this file */
85 static int static_get_options(int *argc,char * * *argv);
86 static int get_replace_strings(int *argc,char * * *argv,
87 POINTER_ARRAY *from_array,
88 POINTER_ARRAY *to_array);
89 static int insert_pointer_name(POINTER_ARRAY *pa, char * name);
90 static void free_pointer_array(POINTER_ARRAY *pa);
91 static int convert_pipe(REPLACE *,FILE *,FILE *);
92 static int convert_file(REPLACE *, char *);
93 static REPLACE *init_replace(char * *from, char * *to,uint count,
94 char * word_end_chars);
95 static uint replace_strings(REPLACE *rep, char * *start,uint *max_length,
96 char * from);
97 static int initialize_buffer(void);
98 static void reset_buffer(void);
99 static void free_buffer(void);
101 static int silent=0,verbose=0,updated=0;
103 /* The main program */
105 int main(int argc, char *argv[])
107 int i,error;
108 char word_end_chars[256],*pos;
109 POINTER_ARRAY from,to;
110 REPLACE *replace;
111 MY_INIT(argv[0]);
113 if (static_get_options(&argc,&argv))
114 exit(1);
115 if (get_replace_strings(&argc,&argv,&from,&to))
116 exit(1);
118 for (i=1,pos=word_end_chars ; i < 256 ; i++)
119 if (my_isspace(&my_charset_latin1,i))
120 *pos++= (char) i;
121 *pos=0;
122 if (!(replace=init_replace((char**) from.typelib.type_names,
123 (char**) to.typelib.type_names,
124 (uint) from.typelib.count,word_end_chars)))
125 exit(1);
126 free_pointer_array(&from);
127 free_pointer_array(&to);
128 if (initialize_buffer())
129 return 1;
131 error=0;
132 if (argc == 0)
133 error=convert_pipe(replace,stdin,stdout);
134 else
136 while (argc--)
138 error=convert_file(replace,*(argv++));
141 free_buffer();
142 my_end(verbose ? MY_CHECK_ERROR | MY_GIVE_INFO : MY_CHECK_ERROR);
143 exit(error ? 2 : 0);
144 return 0; /* No compiler warning */
145 } /* main */
148 /* reads options */
149 /* Initiates DEBUG - but no debugging here ! */
151 static int static_get_options(argc,argv)
152 register int *argc;
153 register char **argv[];
155 int help,version;
156 char *pos;
158 silent=verbose=help=0;
160 while (--*argc > 0 && *(pos = *(++*argv)) == '-' && pos[1] != '-') {
161 while (*++pos)
163 version=0;
164 switch((*pos)) {
165 case 's':
166 silent=1;
167 break;
168 case 'v':
169 verbose=1;
170 break;
171 case '#':
172 DBUG_PUSH (++pos);
173 pos= (char*) " "; /* Skip rest of arguments */
174 break;
175 case 'V':
176 version=1;
177 case 'I':
178 case '?':
179 help=1; /* Help text written */
180 printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE,
181 MACHINE_TYPE);
182 if (version)
183 break;
184 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
185 puts("This program replaces strings in files or from stdin to stdout.\n"
186 "It accepts a list of from-string/to-string pairs and replaces\n"
187 "each occurrence of a from-string with the corresponding to-string.\n"
188 "The first occurrence of a found string is matched. If there is\n"
189 "more than one possibility for the string to replace, longer\n"
190 "matches are preferred before shorter matches.\n\n"
191 "A from-string can contain these special characters:\n"
192 " \\^ Match start of line.\n"
193 " \\$ Match end of line.\n"
194 " \\b Match space-character, start of line or end of line.\n"
195 " For a end \\b the next replace starts locking at the end\n"
196 " space-character. A \\b alone in a string matches only a\n"
197 " space-character.\n");
198 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname);
199 puts("or");
200 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname);
201 puts("");
202 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
203 break;
204 default:
205 fprintf(stderr,"illegal option: -%c\n",*pos);
206 break;
210 if (*argc == 0)
212 if (!help)
213 my_message(0,"No replace options given",MYF(ME_BELL));
214 exit(0); /* Don't use as pipe */
216 return(0);
217 } /* static_get_options */
220 static int get_replace_strings(argc,argv,from_array,to_array)
221 register int *argc;
222 register char **argv[];
223 POINTER_ARRAY *from_array,*to_array;
225 char *pos;
227 bzero((char*) from_array,sizeof(from_array[0]));
228 bzero((char*) to_array,sizeof(to_array[0]));
229 while (*argc > 0 && (*(pos = *(*argv)) != '-' || pos[1] != '-' || pos[2]))
231 insert_pointer_name(from_array,pos);
232 (*argc)--;
233 (*argv)++;
234 if (!*argc || !strcmp(**argv,"--"))
236 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
237 return 1;
239 insert_pointer_name(to_array,**argv);
240 (*argc)--;
241 (*argv)++;
243 if (*argc)
244 { /* Skip "--" argument */
245 (*argc)--;
246 (*argv)++;
248 return 0;
251 static int insert_pointer_name(reg1 POINTER_ARRAY *pa,char * name)
253 uint i,length,old_count;
254 uchar *new_pos;
255 const char **new_array;
256 DBUG_ENTER("insert_pointer_name");
258 if (! pa->typelib.count)
260 if (!(pa->typelib.type_names=(const char **)
261 my_malloc(((PC_MALLOC-MALLOC_OVERHEAD)/
262 (sizeof(char *)+sizeof(*pa->flag))*
263 (sizeof(char *)+sizeof(*pa->flag))),MYF(MY_WME))))
264 DBUG_RETURN(-1);
265 if (!(pa->str= (uchar*) my_malloc((uint) (PS_MALLOC-MALLOC_OVERHEAD),
266 MYF(MY_WME))))
268 my_free((uchar*) pa->typelib.type_names,MYF(0));
269 DBUG_RETURN (-1);
271 pa->max_count=(PC_MALLOC-MALLOC_OVERHEAD)/(sizeof(uchar*)+
272 sizeof(*pa->flag));
273 pa->flag= (int7*) (pa->typelib.type_names+pa->max_count);
274 pa->length=0;
275 pa->max_length=PS_MALLOC-MALLOC_OVERHEAD;
276 pa->array_allocs=1;
278 length=(uint) strlen(name)+1;
279 if (pa->length+length >= pa->max_length)
281 pa->max_length=(pa->length+length+MALLOC_OVERHEAD+PS_MALLOC-1)/PS_MALLOC;
282 pa->max_length=pa->max_length*PS_MALLOC-MALLOC_OVERHEAD;
283 if (!(new_pos= (uchar*) my_realloc((uchar*) pa->str,
284 (uint) pa->max_length,
285 MYF(MY_WME))))
286 DBUG_RETURN(1);
287 if (new_pos != pa->str)
289 my_ptrdiff_t diff=PTR_BYTE_DIFF(new_pos,pa->str);
290 for (i=0 ; i < pa->typelib.count ; i++)
291 pa->typelib.type_names[i]= ADD_TO_PTR(pa->typelib.type_names[i],diff,
292 char*);
293 pa->str=new_pos;
296 if (pa->typelib.count >= pa->max_count-1)
298 int len;
299 pa->array_allocs++;
300 len=(PC_MALLOC*pa->array_allocs - MALLOC_OVERHEAD);
301 if (!(new_array=(const char **) my_realloc((uchar*) pa->typelib.type_names,
302 (uint) len/
303 (sizeof(uchar*)+sizeof(*pa->flag))*
304 (sizeof(uchar*)+sizeof(*pa->flag)),
305 MYF(MY_WME))))
306 DBUG_RETURN(1);
307 pa->typelib.type_names=new_array;
308 old_count=pa->max_count;
309 pa->max_count=len/(sizeof(uchar*) + sizeof(*pa->flag));
310 pa->flag= (int7*) (pa->typelib.type_names+pa->max_count);
311 memcpy((uchar*) pa->flag,(char *) (pa->typelib.type_names+old_count),
312 old_count*sizeof(*pa->flag));
314 pa->flag[pa->typelib.count]=0; /* Reset flag */
315 pa->typelib.type_names[pa->typelib.count++]= (char*) (pa->str+pa->length);
316 pa->typelib.type_names[pa->typelib.count]= NullS; /* Put end-mark */
317 VOID(strmov((char*) pa->str + pa->length, name));
318 pa->length+=length;
319 DBUG_RETURN(0);
320 } /* insert_pointer_name */
323 /* free pointer array */
325 static void free_pointer_array(reg1 POINTER_ARRAY *pa)
327 if (pa->typelib.count)
329 pa->typelib.count=0;
330 my_free((uchar*) pa->typelib.type_names,MYF(0));
331 pa->typelib.type_names=0;
332 my_free((uchar*) pa->str,MYF(0));
334 return;
335 } /* free_pointer_array */
338 /* Code for replace rutines */
340 #define SET_MALLOC_HUNC 64
342 typedef struct st_rep_set {
343 uint *bits; /* Pointer to used sets */
344 short next[LAST_CHAR_CODE]; /* Pointer to next sets */
345 uint found_len; /* Best match to date */
346 int found_offset;
347 uint table_offset;
348 uint size_of_bits; /* For convinience */
349 } REP_SET;
351 typedef struct st_rep_sets {
352 uint count; /* Number of sets */
353 uint extra; /* Extra sets in buffer */
354 uint invisible; /* Sets not chown */
355 uint size_of_bits;
356 REP_SET *set,*set_buffer;
357 uint *bit_buffer;
358 } REP_SETS;
360 typedef struct st_found_set {
361 uint table_offset;
362 int found_offset;
363 } FOUND_SET;
365 typedef struct st_follow {
366 int chr;
367 uint table_offset;
368 uint len;
369 } FOLLOWS;
372 static int init_sets(REP_SETS *sets,uint states);
373 static REP_SET *make_new_set(REP_SETS *sets);
374 static void make_sets_invisible(REP_SETS *sets);
375 static void free_last_set(REP_SETS *sets);
376 static void free_sets(REP_SETS *sets);
377 static void internal_set_bit(REP_SET *set, uint bit);
378 static void internal_clear_bit(REP_SET *set, uint bit);
379 static void or_bits(REP_SET *to,REP_SET *from);
380 static void copy_bits(REP_SET *to,REP_SET *from);
381 static int cmp_bits(REP_SET *set1,REP_SET *set2);
382 static int get_next_bit(REP_SET *set,uint lastpos);
383 static short find_set(REP_SETS *sets,REP_SET *find);
384 static short find_found(FOUND_SET *found_set,uint table_offset,
385 int found_offset);
386 static uint start_at_word(char * pos);
387 static uint end_of_word(char * pos);
388 static uint replace_len(char * pos);
390 static uint found_sets=0;
393 /* Init a replace structure for further calls */
395 static REPLACE *init_replace(char * *from, char * *to,uint count,
396 char * word_end_chars)
398 uint i,j,states,set_nr,len,result_len,max_length,found_end,bits_set,bit_nr;
399 int used_sets,chr;
400 short default_state;
401 char used_chars[LAST_CHAR_CODE],is_word_end[256];
402 char * pos, *to_pos, **to_array;
403 REP_SETS sets;
404 REP_SET *set,*start_states,*word_states,*new_set;
405 FOLLOWS *follow,*follow_ptr;
406 REPLACE *replace;
407 FOUND_SET *found_set;
408 REPLACE_STRING *rep_str;
409 DBUG_ENTER("init_replace");
411 /* Count number of states */
412 for (i=result_len=max_length=0 , states=2 ; i < count ; i++)
414 len=replace_len(from[i]);
415 if (!len)
417 errno=EINVAL;
418 my_message(0,"No to-string for last from-string",MYF(ME_BELL));
419 DBUG_RETURN(0);
421 states+=len+1;
422 result_len+=(uint) strlen(to[i])+1;
423 if (len > max_length)
424 max_length=len;
426 bzero((char*) is_word_end,sizeof(is_word_end));
427 for (i=0 ; word_end_chars[i] ; i++)
428 is_word_end[(uchar) word_end_chars[i]]=1;
430 if (init_sets(&sets,states))
431 DBUG_RETURN(0);
432 found_sets=0;
433 if (!(found_set= (FOUND_SET*) my_malloc(sizeof(FOUND_SET)*max_length*count,
434 MYF(MY_WME))))
436 free_sets(&sets);
437 DBUG_RETURN(0);
439 VOID(make_new_set(&sets)); /* Set starting set */
440 make_sets_invisible(&sets); /* Hide previus sets */
441 used_sets=-1;
442 word_states=make_new_set(&sets); /* Start of new word */
443 start_states=make_new_set(&sets); /* This is first state */
444 if (!(follow=(FOLLOWS*) my_malloc((states+2)*sizeof(FOLLOWS),MYF(MY_WME))))
446 free_sets(&sets);
447 my_free((uchar*) found_set,MYF(0));
448 DBUG_RETURN(0);
451 /* Init follow_ptr[] */
452 for (i=0, states=1, follow_ptr=follow+1 ; i < count ; i++)
454 if (from[i][0] == '\\' && from[i][1] == '^')
456 internal_set_bit(start_states,states+1);
457 if (!from[i][2])
459 start_states->table_offset=i;
460 start_states->found_offset=1;
463 else if (from[i][0] == '\\' && from[i][1] == '$')
465 internal_set_bit(start_states,states);
466 internal_set_bit(word_states,states);
467 if (!from[i][2] && start_states->table_offset == (uint) ~0)
469 start_states->table_offset=i;
470 start_states->found_offset=0;
473 else
475 internal_set_bit(word_states,states);
476 if (from[i][0] == '\\' && (from[i][1] == 'b' && from[i][2]))
477 internal_set_bit(start_states,states+1);
478 else
479 internal_set_bit(start_states,states);
481 for (pos=from[i], len=0; *pos ; pos++)
483 if (*pos == '\\' && *(pos+1))
485 pos++;
486 switch (*pos) {
487 case 'b':
488 follow_ptr->chr = SPACE_CHAR;
489 break;
490 case '^':
491 follow_ptr->chr = START_OF_LINE;
492 break;
493 case '$':
494 follow_ptr->chr = END_OF_LINE;
495 break;
496 case 'r':
497 follow_ptr->chr = '\r';
498 break;
499 case 't':
500 follow_ptr->chr = '\t';
501 break;
502 case 'v':
503 follow_ptr->chr = '\v';
504 break;
505 default:
506 follow_ptr->chr = (uchar) *pos;
507 break;
510 else
511 follow_ptr->chr= (uchar) *pos;
512 follow_ptr->table_offset=i;
513 follow_ptr->len= ++len;
514 follow_ptr++;
516 follow_ptr->chr=0;
517 follow_ptr->table_offset=i;
518 follow_ptr->len=len;
519 follow_ptr++;
520 states+=(uint) len+1;
524 for (set_nr=0,pos=0 ; set_nr < sets.count ; set_nr++)
526 set=sets.set+set_nr;
527 default_state= 0; /* Start from beginning */
529 /* If end of found-string not found or start-set with current set */
531 for (i= (uint) ~0; (i=get_next_bit(set,i)) ;)
533 if (!follow[i].chr)
535 if (! default_state)
536 default_state= find_found(found_set,set->table_offset,
537 set->found_offset+1);
540 copy_bits(sets.set+used_sets,set); /* Save set for changes */
541 if (!default_state)
542 or_bits(sets.set+used_sets,sets.set); /* Can restart from start */
544 /* Find all chars that follows current sets */
545 bzero((char*) used_chars,sizeof(used_chars));
546 for (i= (uint) ~0; (i=get_next_bit(sets.set+used_sets,i)) ;)
548 used_chars[follow[i].chr]=1;
549 if ((follow[i].chr == SPACE_CHAR && !follow[i+1].chr &&
550 follow[i].len > 1) || follow[i].chr == END_OF_LINE)
551 used_chars[0]=1;
554 /* Mark word_chars used if \b is in state */
555 if (used_chars[SPACE_CHAR])
556 for (pos= word_end_chars ; *pos ; pos++)
557 used_chars[(int) (uchar) *pos] = 1;
559 /* Handle other used characters */
560 for (chr= 0 ; chr < 256 ; chr++)
562 if (! used_chars[chr])
563 set->next[chr]= (short) (chr ? default_state : -1);
564 else
566 new_set=make_new_set(&sets);
567 set=sets.set+set_nr; /* if realloc */
568 new_set->table_offset=set->table_offset;
569 new_set->found_len=set->found_len;
570 new_set->found_offset=set->found_offset+1;
571 found_end=0;
573 for (i= (uint) ~0 ; (i=get_next_bit(sets.set+used_sets,i)) ; )
575 if (!follow[i].chr || follow[i].chr == chr ||
576 (follow[i].chr == SPACE_CHAR &&
577 (is_word_end[chr] ||
578 (!chr && follow[i].len > 1 && ! follow[i+1].chr))) ||
579 (follow[i].chr == END_OF_LINE && ! chr))
581 if ((! chr || (follow[i].chr && !follow[i+1].chr)) &&
582 follow[i].len > found_end)
583 found_end=follow[i].len;
584 if (chr && follow[i].chr)
585 internal_set_bit(new_set,i+1); /* To next set */
586 else
587 internal_set_bit(new_set,i);
590 if (found_end)
592 new_set->found_len=0; /* Set for testing if first */
593 bits_set=0;
594 for (i= (uint) ~0; (i=get_next_bit(new_set,i)) ;)
596 if ((follow[i].chr == SPACE_CHAR ||
597 follow[i].chr == END_OF_LINE) && ! chr)
598 bit_nr=i+1;
599 else
600 bit_nr=i;
601 if (follow[bit_nr-1].len < found_end ||
602 (new_set->found_len &&
603 (chr == 0 || !follow[bit_nr].chr)))
604 internal_clear_bit(new_set,i);
605 else
607 if (chr == 0 || !follow[bit_nr].chr)
608 { /* best match */
609 new_set->table_offset=follow[bit_nr].table_offset;
610 if (chr || (follow[i].chr == SPACE_CHAR ||
611 follow[i].chr == END_OF_LINE))
612 new_set->found_offset=found_end; /* New match */
613 new_set->found_len=found_end;
615 bits_set++;
618 if (bits_set == 1)
620 set->next[chr] = find_found(found_set,
621 new_set->table_offset,
622 new_set->found_offset);
623 free_last_set(&sets);
625 else
626 set->next[chr] = find_set(&sets,new_set);
628 else
629 set->next[chr] = find_set(&sets,new_set);
634 /* Alloc replace structure for the replace-state-machine */
636 if ((replace=(REPLACE*) my_malloc(sizeof(REPLACE)*(sets.count)+
637 sizeof(REPLACE_STRING)*(found_sets+1)+
638 sizeof(char *)*count+result_len,
639 MYF(MY_WME | MY_ZEROFILL))))
641 rep_str=(REPLACE_STRING*) (replace+sets.count);
642 to_array=(char **) (rep_str+found_sets+1);
643 to_pos=(char *) (to_array+count);
644 for (i=0 ; i < count ; i++)
646 to_array[i]=to_pos;
647 to_pos=strmov(to_pos,to[i])+1;
649 rep_str[0].found=1;
650 rep_str[0].replace_string=0;
651 for (i=1 ; i <= found_sets ; i++)
653 pos=from[found_set[i-1].table_offset];
654 rep_str[i].found= (my_bool) (!memcmp(pos,"\\^",3) ? 2 : 1);
655 rep_str[i].replace_string=to_array[found_set[i-1].table_offset];
656 rep_str[i].to_offset=found_set[i-1].found_offset-start_at_word(pos);
657 rep_str[i].from_offset=found_set[i-1].found_offset-replace_len(pos)+
658 end_of_word(pos);
660 for (i=0 ; i < sets.count ; i++)
662 for (j=0 ; j < 256 ; j++)
663 if (sets.set[i].next[j] >= 0)
664 replace[i].next[j]=replace+sets.set[i].next[j];
665 else
666 replace[i].next[j]=(REPLACE*) (rep_str+(-sets.set[i].next[j]-1));
669 my_free((uchar*) follow,MYF(0));
670 free_sets(&sets);
671 my_free((uchar*) found_set,MYF(0));
672 DBUG_PRINT("exit",("Replace table has %d states",sets.count));
673 DBUG_RETURN(replace);
677 static int init_sets(REP_SETS *sets,uint states)
679 bzero((char*) sets,sizeof(*sets));
680 sets->size_of_bits=((states+7)/8);
681 if (!(sets->set_buffer=(REP_SET*) my_malloc(sizeof(REP_SET)*SET_MALLOC_HUNC,
682 MYF(MY_WME))))
683 return 1;
684 if (!(sets->bit_buffer=(uint*) my_malloc(sizeof(uint)*sets->size_of_bits*
685 SET_MALLOC_HUNC,MYF(MY_WME))))
687 my_free((uchar*) sets->set,MYF(0));
688 return 1;
690 return 0;
693 /* Make help sets invisible for nicer codeing */
695 static void make_sets_invisible(REP_SETS *sets)
697 sets->invisible=sets->count;
698 sets->set+=sets->count;
699 sets->count=0;
702 static REP_SET *make_new_set(REP_SETS *sets)
704 uint i,count,*bit_buffer;
705 REP_SET *set;
706 if (sets->extra)
708 sets->extra--;
709 set=sets->set+ sets->count++;
710 bzero((char*) set->bits,sizeof(uint)*sets->size_of_bits);
711 bzero((char*) &set->next[0],sizeof(set->next[0])*LAST_CHAR_CODE);
712 set->found_offset=0;
713 set->found_len=0;
714 set->table_offset= (uint) ~0;
715 set->size_of_bits=sets->size_of_bits;
716 return set;
718 count=sets->count+sets->invisible+SET_MALLOC_HUNC;
719 if (!(set=(REP_SET*) my_realloc((uchar*) sets->set_buffer,
720 sizeof(REP_SET)*count,
721 MYF(MY_WME))))
722 return 0;
723 sets->set_buffer=set;
724 sets->set=set+sets->invisible;
725 if (!(bit_buffer=(uint*) my_realloc((uchar*) sets->bit_buffer,
726 (sizeof(uint)*sets->size_of_bits)*count,
727 MYF(MY_WME))))
728 return 0;
729 sets->bit_buffer=bit_buffer;
730 for (i=0 ; i < count ; i++)
732 sets->set_buffer[i].bits=bit_buffer;
733 bit_buffer+=sets->size_of_bits;
735 sets->extra=SET_MALLOC_HUNC;
736 return make_new_set(sets);
739 static void free_last_set(REP_SETS *sets)
741 sets->count--;
742 sets->extra++;
743 return;
746 static void free_sets(REP_SETS *sets)
748 my_free((uchar*)sets->set_buffer,MYF(0));
749 my_free((uchar*)sets->bit_buffer,MYF(0));
750 return;
753 static void internal_set_bit(REP_SET *set, uint bit)
755 set->bits[bit / WORD_BIT] |= 1 << (bit % WORD_BIT);
756 return;
759 static void internal_clear_bit(REP_SET *set, uint bit)
761 set->bits[bit / WORD_BIT] &= ~ (1 << (bit % WORD_BIT));
762 return;
766 static void or_bits(REP_SET *to,REP_SET *from)
768 reg1 uint i;
769 for (i=0 ; i < to->size_of_bits ; i++)
770 to->bits[i]|=from->bits[i];
771 return;
774 static void copy_bits(REP_SET *to,REP_SET *from)
776 memcpy((uchar*) to->bits,(uchar*) from->bits,
777 (size_t) (sizeof(uint) * to->size_of_bits));
780 static int cmp_bits(REP_SET *set1,REP_SET *set2)
782 return memcmp(set1->bits, set2->bits,
783 sizeof(uint) * set1->size_of_bits);
787 /* Get next set bit from set. */
789 static int get_next_bit(REP_SET *set,uint lastpos)
791 uint pos,*start,*end,bits;
793 start=set->bits+ ((lastpos+1) / WORD_BIT);
794 end=set->bits + set->size_of_bits;
795 bits=start[0] & ~((1 << ((lastpos+1) % WORD_BIT)) -1);
797 while (! bits && ++start < end)
798 bits=start[0];
799 if (!bits)
800 return 0;
801 pos=(uint) (start-set->bits)*WORD_BIT;
802 while (! (bits & 1))
804 bits>>=1;
805 pos++;
807 return pos;
810 /* find if there is a same set in sets. If there is, use it and
811 free given set, else put in given set in sets and return it's
812 position */
814 static short find_set(REP_SETS *sets,REP_SET *find)
816 uint i;
817 for (i=0 ; i < sets->count-1 ; i++)
819 if (!cmp_bits(sets->set+i,find))
821 free_last_set(sets);
822 return (short) i;
825 return (short) i; /* return new position */
830 find if there is a found_set with same table_offset & found_offset
831 If there is return offset to it, else add new offset and return pos.
832 Pos returned is -offset-2 in found_set_structure because it's is
833 saved in set->next and set->next[] >= 0 points to next set and
834 set->next[] == -1 is reserved for end without replaces.
837 static short find_found(FOUND_SET *found_set,uint table_offset,
838 int found_offset)
840 int i;
841 for (i=0 ; (uint) i < found_sets ; i++)
842 if (found_set[i].table_offset == table_offset &&
843 found_set[i].found_offset == found_offset)
844 return (short) (-i-2);
845 found_set[i].table_offset=table_offset;
846 found_set[i].found_offset=found_offset;
847 found_sets++;
848 return (short) (-i-2); /* return new position */
851 /* Return 1 if regexp starts with \b or ends with \b*/
853 static uint start_at_word(char * pos)
855 return (((!memcmp(pos,"\\b",2) && pos[2]) || !memcmp(pos,"\\^",2)) ? 1 : 0);
858 static uint end_of_word(char * pos)
860 char * end=strend(pos);
861 return ((end > pos+2 && !memcmp(end-2,"\\b",2)) ||
862 (end >= pos+2 && !memcmp(end-2,"\\$",2))) ?
863 1 : 0;
867 static uint replace_len(char * str)
869 uint len=0;
870 while (*str)
872 if (str[0] == '\\' && str[1])
873 str++;
874 str++;
875 len++;
877 return len;
881 /* The actual loop */
883 static uint replace_strings(REPLACE *rep, char **start, uint *max_length,
884 char *from)
886 reg1 REPLACE *rep_pos;
887 reg2 REPLACE_STRING *rep_str;
888 char *to, *end, *pos, *new;
890 end=(to= *start) + *max_length-1;
891 rep_pos=rep+1;
892 for(;;)
894 while (!rep_pos->found)
896 rep_pos= rep_pos->next[(uchar) *from];
897 if (to == end)
899 (*max_length)+=8192;
900 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
901 return (uint) -1;
902 to=new+(to - *start);
903 end=(*start=new)+ *max_length-1;
905 *to++= *from++;
907 if (!(rep_str = ((REPLACE_STRING*) rep_pos))->replace_string)
908 return (uint) (to - *start)-1;
909 updated=1; /* Some char * is replaced */
910 to-=rep_str->to_offset;
911 for (pos=rep_str->replace_string; *pos ; pos++)
913 if (to == end)
915 (*max_length)*=2;
916 if (!(new=my_realloc(*start,*max_length,MYF(MY_WME))))
917 return (uint) -1;
918 to=new+(to - *start);
919 end=(*start=new)+ *max_length-1;
921 *to++= *pos;
923 if (!*(from-=rep_str->from_offset) && rep_pos->found != 2)
924 return (uint) (to - *start);
925 rep_pos=rep;
929 static char *buffer; /* The buffer itself, grown as needed. */
930 static int bufbytes; /* Number of bytes in the buffer. */
931 static int bufread,my_eof; /* Number of bytes to get with each read(). */
932 static uint bufalloc;
933 static char *out_buff;
934 static uint out_length;
936 static int initialize_buffer()
938 bufread = 8192;
939 bufalloc = bufread + bufread / 2;
940 if (!(buffer = my_malloc(bufalloc+1,MYF(MY_WME))))
941 return 1;
942 bufbytes=my_eof=0;
943 out_length=bufread;
944 if (!(out_buff=my_malloc(out_length,MYF(MY_WME))))
945 return(1);
946 return 0;
949 static void reset_buffer()
951 bufbytes=my_eof=0;
954 static void free_buffer()
956 my_free(buffer,MYF(MY_WME));
957 my_free(out_buff,MYF(MY_WME));
962 Fill the buffer retaining the last n bytes at the beginning of the
963 newly filled buffer (for backward context). Returns the number of new
964 bytes read from disk.
967 static int fill_buffer_retaining(fd,n)
968 File fd;
969 int n;
971 int i;
973 /* See if we need to grow the buffer. */
974 if ((int) bufalloc - n <= bufread)
976 while ((int) bufalloc - n <= bufread)
978 bufalloc *= 2;
979 bufread *= 2;
981 buffer = my_realloc(buffer, bufalloc+1, MYF(MY_WME));
982 if (! buffer)
983 return(-1);
986 /* Shift stuff down. */
987 bmove(buffer,buffer+bufbytes-n,(uint) n);
988 bufbytes = n;
990 if (my_eof)
991 return 0;
993 /* Read in new stuff. */
994 if ((i=(int) my_read(fd, (uchar*) buffer + bufbytes,
995 (size_t) bufread, MYF(MY_WME))) < 0)
996 return -1;
998 /* Kludge to pretend every nonempty file ends with a newline. */
999 if (i == 0 && bufbytes > 0 && buffer[bufbytes - 1] != '\n')
1001 my_eof = i = 1;
1002 buffer[bufbytes] = '\n';
1005 bufbytes += i;
1006 return i;
1009 /* Return 0 if convert is ok */
1010 /* Global variable update is set if something was changed */
1012 static int convert_pipe(rep,in,out)
1013 REPLACE *rep;
1014 FILE *in,*out;
1016 int retain,error;
1017 uint length;
1018 char save_char,*end_of_line,*start_of_line;
1019 DBUG_ENTER("convert_pipe");
1021 updated=retain=0;
1022 reset_buffer();
1024 while ((error=fill_buffer_retaining(fileno(in),retain)) > 0)
1026 end_of_line=buffer ;
1027 buffer[bufbytes]=0; /* Sentinel */
1028 for (;;)
1030 start_of_line=end_of_line;
1031 while (end_of_line[0] != '\n' && end_of_line[0])
1032 end_of_line++;
1033 if (end_of_line == buffer+bufbytes)
1035 retain= (int) (end_of_line - start_of_line);
1036 break; /* No end of line, read more */
1038 save_char=end_of_line[0];
1039 end_of_line[0]=0;
1040 end_of_line++;
1041 if ((length=replace_strings(rep,&out_buff,&out_length,start_of_line)) ==
1042 (uint) -1)
1043 return 1;
1044 if (!my_eof)
1045 out_buff[length++]=save_char; /* Don't write added newline */
1046 if (my_fwrite(out, (uchar*) out_buff, length, MYF(MY_WME | MY_NABP)))
1047 DBUG_RETURN(1);
1050 DBUG_RETURN(error);
1054 static int convert_file(REPLACE *rep, char * name)
1056 int error;
1057 FILE *in,*out;
1058 char dir_buff[FN_REFLEN], tempname[FN_REFLEN], *org_name = name;
1059 #ifdef HAVE_READLINK
1060 char link_name[FN_REFLEN];
1061 #endif
1062 File temp_file;
1063 size_t dir_buff_length;
1064 DBUG_ENTER("convert_file");
1066 /* check if name is a symlink */
1067 #ifdef HAVE_READLINK
1068 org_name= (!my_disable_symlinks &&
1069 !my_readlink(link_name, name, MYF(0))) ? link_name : name;
1070 #endif
1071 if (!(in= my_fopen(org_name,O_RDONLY,MYF(MY_WME))))
1072 DBUG_RETURN(1);
1073 dirname_part(dir_buff, org_name, &dir_buff_length);
1074 if ((temp_file= create_temp_file(tempname, dir_buff, "PR", O_WRONLY,
1075 MYF(MY_WME))) < 0)
1077 my_fclose(in,MYF(0));
1078 DBUG_RETURN(1);
1080 if (!(out= my_fdopen(temp_file, tempname, O_WRONLY, MYF(MY_WME))))
1082 my_fclose(in,MYF(0));
1083 DBUG_RETURN(1);
1086 error=convert_pipe(rep,in,out);
1087 my_fclose(in,MYF(0)); my_fclose(out,MYF(0));
1089 if (updated && ! error)
1090 my_redel(org_name,tempname,MYF(MY_WME | MY_LINK_WARNING));
1091 else
1092 my_delete(tempname,MYF(MY_WME));
1093 if (!silent && ! error)
1095 if (updated)
1096 printf("%s converted\n",name);
1097 else if (verbose)
1098 printf("%s left unchanged\n",name);
1100 DBUG_RETURN(error);