2 Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
4 This program is free software; you can redistribute it and/or
5 modify it under the terms of the GNU General Public License
6 as published by the Free Software Foundation; version 2 of
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 Replace strings in textfile
22 This program replaces strings in files or from stdin to stdout.
23 It accepts a list of from-string/to-string pairs and replaces
24 each occurrence of a from-string with the corresponding to-string.
25 The first occurrence of a found string is matched. If there is more
26 than one possibility for the string to replace, longer matches
27 are preferred before shorter matches.
29 Special characters in from string:
30 \^ Match start of line.
32 \b Match space-character, start of line or end of line.
33 For end \b the next replace starts locking at the end space-character.
34 An \b alone or in a string matches only a space-character.
36 The programs make a DFA-state-machine of the strings and the speed isn't
37 dependent on the count of replace-strings (only of the number of replaces).
38 A line is assumed ending with \n or \0.
39 There are no limit exept memory on length of strings.
42 fill_buffer_retaining() is taken from gnu-grep and modified.
46 #include <my_global.h>
52 #define PC_MALLOC 256 /* Bytes for pointers */
53 #define PS_MALLOC 512 /* Bytes for data */
55 typedef struct st_pointer_array
{ /* when using array-strings */
56 TYPELIB typelib
; /* Pointer to strings */
57 uchar
*str
; /* Strings is here */
58 int7
*flag
; /* Flag about each var. */
59 uint array_allocs
,max_count
,length
,max_length
;
62 #define SPACE_CHAR 256
63 #define START_OF_LINE 257
64 #define END_OF_LINE 258
65 #define LAST_CHAR_CODE 259
67 typedef struct st_replace
{
69 struct st_replace
*next
[256];
72 typedef struct st_replace_found
{
80 #define WORD_BIT (8*sizeof(uint))
83 /* functions defined in this file */
85 static int static_get_options(int *argc
,char * * *argv
);
86 static int get_replace_strings(int *argc
,char * * *argv
,
87 POINTER_ARRAY
*from_array
,
88 POINTER_ARRAY
*to_array
);
89 static int insert_pointer_name(POINTER_ARRAY
*pa
, char * name
);
90 static void free_pointer_array(POINTER_ARRAY
*pa
);
91 static int convert_pipe(REPLACE
*,FILE *,FILE *);
92 static int convert_file(REPLACE
*, char *);
93 static REPLACE
*init_replace(char * *from
, char * *to
,uint count
,
94 char * word_end_chars
);
95 static uint
replace_strings(REPLACE
*rep
, char * *start
,uint
*max_length
,
97 static int initialize_buffer(void);
98 static void reset_buffer(void);
99 static void free_buffer(void);
101 static int silent
=0,verbose
=0,updated
=0;
103 /* The main program */
105 int main(int argc
, char *argv
[])
108 char word_end_chars
[256],*pos
;
109 POINTER_ARRAY from
,to
;
113 if (static_get_options(&argc
,&argv
))
115 if (get_replace_strings(&argc
,&argv
,&from
,&to
))
118 for (i
=1,pos
=word_end_chars
; i
< 256 ; i
++)
119 if (my_isspace(&my_charset_latin1
,i
))
122 if (!(replace
=init_replace((char**) from
.typelib
.type_names
,
123 (char**) to
.typelib
.type_names
,
124 (uint
) from
.typelib
.count
,word_end_chars
)))
126 free_pointer_array(&from
);
127 free_pointer_array(&to
);
128 if (initialize_buffer())
133 error
=convert_pipe(replace
,stdin
,stdout
);
138 error
=convert_file(replace
,*(argv
++));
142 my_end(verbose
? MY_CHECK_ERROR
| MY_GIVE_INFO
: MY_CHECK_ERROR
);
144 return 0; /* No compiler warning */
149 /* Initiates DEBUG - but no debugging here ! */
151 static int static_get_options(argc
,argv
)
153 register char **argv
[];
158 silent
=verbose
=help
=0;
160 while (--*argc
> 0 && *(pos
= *(++*argv
)) == '-' && pos
[1] != '-') {
173 pos
= (char*) " "; /* Skip rest of arguments */
179 help
=1; /* Help text written */
180 printf("%s Ver 1.4 for %s at %s\n",my_progname
,SYSTEM_TYPE
,
184 puts("This software comes with ABSOLUTELY NO WARRANTY. This is free software,\nand you are welcome to modify and redistribute it under the GPL license\n");
185 puts("This program replaces strings in files or from stdin to stdout.\n"
186 "It accepts a list of from-string/to-string pairs and replaces\n"
187 "each occurrence of a from-string with the corresponding to-string.\n"
188 "The first occurrence of a found string is matched. If there is\n"
189 "more than one possibility for the string to replace, longer\n"
190 "matches are preferred before shorter matches.\n\n"
191 "A from-string can contain these special characters:\n"
192 " \\^ Match start of line.\n"
193 " \\$ Match end of line.\n"
194 " \\b Match space-character, start of line or end of line.\n"
195 " For a end \\b the next replace starts locking at the end\n"
196 " space-character. A \\b alone in a string matches only a\n"
197 " space-character.\n");
198 printf("Usage: %s [-?svIV] from to from to ... -- [files]\n", my_progname
);
200 printf("Usage: %s [-?svIV] from to from to ... < fromfile > tofile\n", my_progname
);
202 puts("Options: -? or -I \"Info\" -s \"silent\" -v \"verbose\"");
205 fprintf(stderr
,"illegal option: -%c\n",*pos
);
213 my_message(0,"No replace options given",MYF(ME_BELL
));
214 exit(0); /* Don't use as pipe */
217 } /* static_get_options */
220 static int get_replace_strings(argc
,argv
,from_array
,to_array
)
222 register char **argv
[];
223 POINTER_ARRAY
*from_array
,*to_array
;
227 bzero((char*) from_array
,sizeof(from_array
[0]));
228 bzero((char*) to_array
,sizeof(to_array
[0]));
229 while (*argc
> 0 && (*(pos
= *(*argv
)) != '-' || pos
[1] != '-' || pos
[2]))
231 insert_pointer_name(from_array
,pos
);
234 if (!*argc
|| !strcmp(**argv
,"--"))
236 my_message(0,"No to-string for last from-string",MYF(ME_BELL
));
239 insert_pointer_name(to_array
,**argv
);
244 { /* Skip "--" argument */
251 static int insert_pointer_name(reg1 POINTER_ARRAY
*pa
,char * name
)
253 uint i
,length
,old_count
;
255 const char **new_array
;
256 DBUG_ENTER("insert_pointer_name");
258 if (! pa
->typelib
.count
)
260 if (!(pa
->typelib
.type_names
=(const char **)
261 my_malloc(((PC_MALLOC
-MALLOC_OVERHEAD
)/
262 (sizeof(char *)+sizeof(*pa
->flag
))*
263 (sizeof(char *)+sizeof(*pa
->flag
))),MYF(MY_WME
))))
265 if (!(pa
->str
= (uchar
*) my_malloc((uint
) (PS_MALLOC
-MALLOC_OVERHEAD
),
268 my_free((uchar
*) pa
->typelib
.type_names
,MYF(0));
271 pa
->max_count
=(PC_MALLOC
-MALLOC_OVERHEAD
)/(sizeof(uchar
*)+
273 pa
->flag
= (int7
*) (pa
->typelib
.type_names
+pa
->max_count
);
275 pa
->max_length
=PS_MALLOC
-MALLOC_OVERHEAD
;
278 length
=(uint
) strlen(name
)+1;
279 if (pa
->length
+length
>= pa
->max_length
)
281 pa
->max_length
=(pa
->length
+length
+MALLOC_OVERHEAD
+PS_MALLOC
-1)/PS_MALLOC
;
282 pa
->max_length
=pa
->max_length
*PS_MALLOC
-MALLOC_OVERHEAD
;
283 if (!(new_pos
= (uchar
*) my_realloc((uchar
*) pa
->str
,
284 (uint
) pa
->max_length
,
287 if (new_pos
!= pa
->str
)
289 my_ptrdiff_t diff
=PTR_BYTE_DIFF(new_pos
,pa
->str
);
290 for (i
=0 ; i
< pa
->typelib
.count
; i
++)
291 pa
->typelib
.type_names
[i
]= ADD_TO_PTR(pa
->typelib
.type_names
[i
],diff
,
296 if (pa
->typelib
.count
>= pa
->max_count
-1)
300 len
=(PC_MALLOC
*pa
->array_allocs
- MALLOC_OVERHEAD
);
301 if (!(new_array
=(const char **) my_realloc((uchar
*) pa
->typelib
.type_names
,
303 (sizeof(uchar
*)+sizeof(*pa
->flag
))*
304 (sizeof(uchar
*)+sizeof(*pa
->flag
)),
307 pa
->typelib
.type_names
=new_array
;
308 old_count
=pa
->max_count
;
309 pa
->max_count
=len
/(sizeof(uchar
*) + sizeof(*pa
->flag
));
310 pa
->flag
= (int7
*) (pa
->typelib
.type_names
+pa
->max_count
);
311 memcpy((uchar
*) pa
->flag
,(char *) (pa
->typelib
.type_names
+old_count
),
312 old_count
*sizeof(*pa
->flag
));
314 pa
->flag
[pa
->typelib
.count
]=0; /* Reset flag */
315 pa
->typelib
.type_names
[pa
->typelib
.count
++]= (char*) (pa
->str
+pa
->length
);
316 pa
->typelib
.type_names
[pa
->typelib
.count
]= NullS
; /* Put end-mark */
317 VOID(strmov((char*) pa
->str
+ pa
->length
, name
));
320 } /* insert_pointer_name */
323 /* free pointer array */
325 static void free_pointer_array(reg1 POINTER_ARRAY
*pa
)
327 if (pa
->typelib
.count
)
330 my_free((uchar
*) pa
->typelib
.type_names
,MYF(0));
331 pa
->typelib
.type_names
=0;
332 my_free((uchar
*) pa
->str
,MYF(0));
335 } /* free_pointer_array */
338 /* Code for replace rutines */
340 #define SET_MALLOC_HUNC 64
342 typedef struct st_rep_set
{
343 uint
*bits
; /* Pointer to used sets */
344 short next
[LAST_CHAR_CODE
]; /* Pointer to next sets */
345 uint found_len
; /* Best match to date */
348 uint size_of_bits
; /* For convinience */
351 typedef struct st_rep_sets
{
352 uint count
; /* Number of sets */
353 uint extra
; /* Extra sets in buffer */
354 uint invisible
; /* Sets not chown */
356 REP_SET
*set
,*set_buffer
;
360 typedef struct st_found_set
{
365 typedef struct st_follow
{
372 static int init_sets(REP_SETS
*sets
,uint states
);
373 static REP_SET
*make_new_set(REP_SETS
*sets
);
374 static void make_sets_invisible(REP_SETS
*sets
);
375 static void free_last_set(REP_SETS
*sets
);
376 static void free_sets(REP_SETS
*sets
);
377 static void internal_set_bit(REP_SET
*set
, uint bit
);
378 static void internal_clear_bit(REP_SET
*set
, uint bit
);
379 static void or_bits(REP_SET
*to
,REP_SET
*from
);
380 static void copy_bits(REP_SET
*to
,REP_SET
*from
);
381 static int cmp_bits(REP_SET
*set1
,REP_SET
*set2
);
382 static int get_next_bit(REP_SET
*set
,uint lastpos
);
383 static short find_set(REP_SETS
*sets
,REP_SET
*find
);
384 static short find_found(FOUND_SET
*found_set
,uint table_offset
,
386 static uint
start_at_word(char * pos
);
387 static uint
end_of_word(char * pos
);
388 static uint
replace_len(char * pos
);
390 static uint found_sets
=0;
393 /* Init a replace structure for further calls */
395 static REPLACE
*init_replace(char * *from
, char * *to
,uint count
,
396 char * word_end_chars
)
398 uint i
,j
,states
,set_nr
,len
,result_len
,max_length
,found_end
,bits_set
,bit_nr
;
401 char used_chars
[LAST_CHAR_CODE
],is_word_end
[256];
402 char * pos
, *to_pos
, **to_array
;
404 REP_SET
*set
,*start_states
,*word_states
,*new_set
;
405 FOLLOWS
*follow
,*follow_ptr
;
407 FOUND_SET
*found_set
;
408 REPLACE_STRING
*rep_str
;
409 DBUG_ENTER("init_replace");
411 /* Count number of states */
412 for (i
=result_len
=max_length
=0 , states
=2 ; i
< count
; i
++)
414 len
=replace_len(from
[i
]);
418 my_message(0,"No to-string for last from-string",MYF(ME_BELL
));
422 result_len
+=(uint
) strlen(to
[i
])+1;
423 if (len
> max_length
)
426 bzero((char*) is_word_end
,sizeof(is_word_end
));
427 for (i
=0 ; word_end_chars
[i
] ; i
++)
428 is_word_end
[(uchar
) word_end_chars
[i
]]=1;
430 if (init_sets(&sets
,states
))
433 if (!(found_set
= (FOUND_SET
*) my_malloc(sizeof(FOUND_SET
)*max_length
*count
,
439 VOID(make_new_set(&sets
)); /* Set starting set */
440 make_sets_invisible(&sets
); /* Hide previus sets */
442 word_states
=make_new_set(&sets
); /* Start of new word */
443 start_states
=make_new_set(&sets
); /* This is first state */
444 if (!(follow
=(FOLLOWS
*) my_malloc((states
+2)*sizeof(FOLLOWS
),MYF(MY_WME
))))
447 my_free((uchar
*) found_set
,MYF(0));
451 /* Init follow_ptr[] */
452 for (i
=0, states
=1, follow_ptr
=follow
+1 ; i
< count
; i
++)
454 if (from
[i
][0] == '\\' && from
[i
][1] == '^')
456 internal_set_bit(start_states
,states
+1);
459 start_states
->table_offset
=i
;
460 start_states
->found_offset
=1;
463 else if (from
[i
][0] == '\\' && from
[i
][1] == '$')
465 internal_set_bit(start_states
,states
);
466 internal_set_bit(word_states
,states
);
467 if (!from
[i
][2] && start_states
->table_offset
== (uint
) ~0)
469 start_states
->table_offset
=i
;
470 start_states
->found_offset
=0;
475 internal_set_bit(word_states
,states
);
476 if (from
[i
][0] == '\\' && (from
[i
][1] == 'b' && from
[i
][2]))
477 internal_set_bit(start_states
,states
+1);
479 internal_set_bit(start_states
,states
);
481 for (pos
=from
[i
], len
=0; *pos
; pos
++)
483 if (*pos
== '\\' && *(pos
+1))
488 follow_ptr
->chr
= SPACE_CHAR
;
491 follow_ptr
->chr
= START_OF_LINE
;
494 follow_ptr
->chr
= END_OF_LINE
;
497 follow_ptr
->chr
= '\r';
500 follow_ptr
->chr
= '\t';
503 follow_ptr
->chr
= '\v';
506 follow_ptr
->chr
= (uchar
) *pos
;
511 follow_ptr
->chr
= (uchar
) *pos
;
512 follow_ptr
->table_offset
=i
;
513 follow_ptr
->len
= ++len
;
517 follow_ptr
->table_offset
=i
;
520 states
+=(uint
) len
+1;
524 for (set_nr
=0,pos
=0 ; set_nr
< sets
.count
; set_nr
++)
527 default_state
= 0; /* Start from beginning */
529 /* If end of found-string not found or start-set with current set */
531 for (i
= (uint
) ~0; (i
=get_next_bit(set
,i
)) ;)
536 default_state
= find_found(found_set
,set
->table_offset
,
537 set
->found_offset
+1);
540 copy_bits(sets
.set
+used_sets
,set
); /* Save set for changes */
542 or_bits(sets
.set
+used_sets
,sets
.set
); /* Can restart from start */
544 /* Find all chars that follows current sets */
545 bzero((char*) used_chars
,sizeof(used_chars
));
546 for (i
= (uint
) ~0; (i
=get_next_bit(sets
.set
+used_sets
,i
)) ;)
548 used_chars
[follow
[i
].chr
]=1;
549 if ((follow
[i
].chr
== SPACE_CHAR
&& !follow
[i
+1].chr
&&
550 follow
[i
].len
> 1) || follow
[i
].chr
== END_OF_LINE
)
554 /* Mark word_chars used if \b is in state */
555 if (used_chars
[SPACE_CHAR
])
556 for (pos
= word_end_chars
; *pos
; pos
++)
557 used_chars
[(int) (uchar
) *pos
] = 1;
559 /* Handle other used characters */
560 for (chr
= 0 ; chr
< 256 ; chr
++)
562 if (! used_chars
[chr
])
563 set
->next
[chr
]= (short) (chr
? default_state
: -1);
566 new_set
=make_new_set(&sets
);
567 set
=sets
.set
+set_nr
; /* if realloc */
568 new_set
->table_offset
=set
->table_offset
;
569 new_set
->found_len
=set
->found_len
;
570 new_set
->found_offset
=set
->found_offset
+1;
573 for (i
= (uint
) ~0 ; (i
=get_next_bit(sets
.set
+used_sets
,i
)) ; )
575 if (!follow
[i
].chr
|| follow
[i
].chr
== chr
||
576 (follow
[i
].chr
== SPACE_CHAR
&&
578 (!chr
&& follow
[i
].len
> 1 && ! follow
[i
+1].chr
))) ||
579 (follow
[i
].chr
== END_OF_LINE
&& ! chr
))
581 if ((! chr
|| (follow
[i
].chr
&& !follow
[i
+1].chr
)) &&
582 follow
[i
].len
> found_end
)
583 found_end
=follow
[i
].len
;
584 if (chr
&& follow
[i
].chr
)
585 internal_set_bit(new_set
,i
+1); /* To next set */
587 internal_set_bit(new_set
,i
);
592 new_set
->found_len
=0; /* Set for testing if first */
594 for (i
= (uint
) ~0; (i
=get_next_bit(new_set
,i
)) ;)
596 if ((follow
[i
].chr
== SPACE_CHAR
||
597 follow
[i
].chr
== END_OF_LINE
) && ! chr
)
601 if (follow
[bit_nr
-1].len
< found_end
||
602 (new_set
->found_len
&&
603 (chr
== 0 || !follow
[bit_nr
].chr
)))
604 internal_clear_bit(new_set
,i
);
607 if (chr
== 0 || !follow
[bit_nr
].chr
)
609 new_set
->table_offset
=follow
[bit_nr
].table_offset
;
610 if (chr
|| (follow
[i
].chr
== SPACE_CHAR
||
611 follow
[i
].chr
== END_OF_LINE
))
612 new_set
->found_offset
=found_end
; /* New match */
613 new_set
->found_len
=found_end
;
620 set
->next
[chr
] = find_found(found_set
,
621 new_set
->table_offset
,
622 new_set
->found_offset
);
623 free_last_set(&sets
);
626 set
->next
[chr
] = find_set(&sets
,new_set
);
629 set
->next
[chr
] = find_set(&sets
,new_set
);
634 /* Alloc replace structure for the replace-state-machine */
636 if ((replace
=(REPLACE
*) my_malloc(sizeof(REPLACE
)*(sets
.count
)+
637 sizeof(REPLACE_STRING
)*(found_sets
+1)+
638 sizeof(char *)*count
+result_len
,
639 MYF(MY_WME
| MY_ZEROFILL
))))
641 rep_str
=(REPLACE_STRING
*) (replace
+sets
.count
);
642 to_array
=(char **) (rep_str
+found_sets
+1);
643 to_pos
=(char *) (to_array
+count
);
644 for (i
=0 ; i
< count
; i
++)
647 to_pos
=strmov(to_pos
,to
[i
])+1;
650 rep_str
[0].replace_string
=0;
651 for (i
=1 ; i
<= found_sets
; i
++)
653 pos
=from
[found_set
[i
-1].table_offset
];
654 rep_str
[i
].found
= (my_bool
) (!memcmp(pos
,"\\^",3) ? 2 : 1);
655 rep_str
[i
].replace_string
=to_array
[found_set
[i
-1].table_offset
];
656 rep_str
[i
].to_offset
=found_set
[i
-1].found_offset
-start_at_word(pos
);
657 rep_str
[i
].from_offset
=found_set
[i
-1].found_offset
-replace_len(pos
)+
660 for (i
=0 ; i
< sets
.count
; i
++)
662 for (j
=0 ; j
< 256 ; j
++)
663 if (sets
.set
[i
].next
[j
] >= 0)
664 replace
[i
].next
[j
]=replace
+sets
.set
[i
].next
[j
];
666 replace
[i
].next
[j
]=(REPLACE
*) (rep_str
+(-sets
.set
[i
].next
[j
]-1));
669 my_free((uchar
*) follow
,MYF(0));
671 my_free((uchar
*) found_set
,MYF(0));
672 DBUG_PRINT("exit",("Replace table has %d states",sets
.count
));
673 DBUG_RETURN(replace
);
677 static int init_sets(REP_SETS
*sets
,uint states
)
679 bzero((char*) sets
,sizeof(*sets
));
680 sets
->size_of_bits
=((states
+7)/8);
681 if (!(sets
->set_buffer
=(REP_SET
*) my_malloc(sizeof(REP_SET
)*SET_MALLOC_HUNC
,
684 if (!(sets
->bit_buffer
=(uint
*) my_malloc(sizeof(uint
)*sets
->size_of_bits
*
685 SET_MALLOC_HUNC
,MYF(MY_WME
))))
687 my_free((uchar
*) sets
->set
,MYF(0));
693 /* Make help sets invisible for nicer codeing */
695 static void make_sets_invisible(REP_SETS
*sets
)
697 sets
->invisible
=sets
->count
;
698 sets
->set
+=sets
->count
;
702 static REP_SET
*make_new_set(REP_SETS
*sets
)
704 uint i
,count
,*bit_buffer
;
709 set
=sets
->set
+ sets
->count
++;
710 bzero((char*) set
->bits
,sizeof(uint
)*sets
->size_of_bits
);
711 bzero((char*) &set
->next
[0],sizeof(set
->next
[0])*LAST_CHAR_CODE
);
714 set
->table_offset
= (uint
) ~0;
715 set
->size_of_bits
=sets
->size_of_bits
;
718 count
=sets
->count
+sets
->invisible
+SET_MALLOC_HUNC
;
719 if (!(set
=(REP_SET
*) my_realloc((uchar
*) sets
->set_buffer
,
720 sizeof(REP_SET
)*count
,
723 sets
->set_buffer
=set
;
724 sets
->set
=set
+sets
->invisible
;
725 if (!(bit_buffer
=(uint
*) my_realloc((uchar
*) sets
->bit_buffer
,
726 (sizeof(uint
)*sets
->size_of_bits
)*count
,
729 sets
->bit_buffer
=bit_buffer
;
730 for (i
=0 ; i
< count
; i
++)
732 sets
->set_buffer
[i
].bits
=bit_buffer
;
733 bit_buffer
+=sets
->size_of_bits
;
735 sets
->extra
=SET_MALLOC_HUNC
;
736 return make_new_set(sets
);
739 static void free_last_set(REP_SETS
*sets
)
746 static void free_sets(REP_SETS
*sets
)
748 my_free((uchar
*)sets
->set_buffer
,MYF(0));
749 my_free((uchar
*)sets
->bit_buffer
,MYF(0));
753 static void internal_set_bit(REP_SET
*set
, uint bit
)
755 set
->bits
[bit
/ WORD_BIT
] |= 1 << (bit
% WORD_BIT
);
759 static void internal_clear_bit(REP_SET
*set
, uint bit
)
761 set
->bits
[bit
/ WORD_BIT
] &= ~ (1 << (bit
% WORD_BIT
));
766 static void or_bits(REP_SET
*to
,REP_SET
*from
)
769 for (i
=0 ; i
< to
->size_of_bits
; i
++)
770 to
->bits
[i
]|=from
->bits
[i
];
774 static void copy_bits(REP_SET
*to
,REP_SET
*from
)
776 memcpy((uchar
*) to
->bits
,(uchar
*) from
->bits
,
777 (size_t) (sizeof(uint
) * to
->size_of_bits
));
780 static int cmp_bits(REP_SET
*set1
,REP_SET
*set2
)
782 return memcmp(set1
->bits
, set2
->bits
,
783 sizeof(uint
) * set1
->size_of_bits
);
787 /* Get next set bit from set. */
789 static int get_next_bit(REP_SET
*set
,uint lastpos
)
791 uint pos
,*start
,*end
,bits
;
793 start
=set
->bits
+ ((lastpos
+1) / WORD_BIT
);
794 end
=set
->bits
+ set
->size_of_bits
;
795 bits
=start
[0] & ~((1 << ((lastpos
+1) % WORD_BIT
)) -1);
797 while (! bits
&& ++start
< end
)
801 pos
=(uint
) (start
-set
->bits
)*WORD_BIT
;
810 /* find if there is a same set in sets. If there is, use it and
811 free given set, else put in given set in sets and return it's
814 static short find_set(REP_SETS
*sets
,REP_SET
*find
)
817 for (i
=0 ; i
< sets
->count
-1 ; i
++)
819 if (!cmp_bits(sets
->set
+i
,find
))
825 return (short) i
; /* return new position */
830 find if there is a found_set with same table_offset & found_offset
831 If there is return offset to it, else add new offset and return pos.
832 Pos returned is -offset-2 in found_set_structure because it's is
833 saved in set->next and set->next[] >= 0 points to next set and
834 set->next[] == -1 is reserved for end without replaces.
837 static short find_found(FOUND_SET
*found_set
,uint table_offset
,
841 for (i
=0 ; (uint
) i
< found_sets
; i
++)
842 if (found_set
[i
].table_offset
== table_offset
&&
843 found_set
[i
].found_offset
== found_offset
)
844 return (short) (-i
-2);
845 found_set
[i
].table_offset
=table_offset
;
846 found_set
[i
].found_offset
=found_offset
;
848 return (short) (-i
-2); /* return new position */
851 /* Return 1 if regexp starts with \b or ends with \b*/
853 static uint
start_at_word(char * pos
)
855 return (((!memcmp(pos
,"\\b",2) && pos
[2]) || !memcmp(pos
,"\\^",2)) ? 1 : 0);
858 static uint
end_of_word(char * pos
)
860 char * end
=strend(pos
);
861 return ((end
> pos
+2 && !memcmp(end
-2,"\\b",2)) ||
862 (end
>= pos
+2 && !memcmp(end
-2,"\\$",2))) ?
867 static uint
replace_len(char * str
)
872 if (str
[0] == '\\' && str
[1])
881 /* The actual loop */
883 static uint
replace_strings(REPLACE
*rep
, char **start
, uint
*max_length
,
886 reg1 REPLACE
*rep_pos
;
887 reg2 REPLACE_STRING
*rep_str
;
888 char *to
, *end
, *pos
, *new;
890 end
=(to
= *start
) + *max_length
-1;
894 while (!rep_pos
->found
)
896 rep_pos
= rep_pos
->next
[(uchar
) *from
];
900 if (!(new=my_realloc(*start
,*max_length
,MYF(MY_WME
))))
902 to
=new+(to
- *start
);
903 end
=(*start
=new)+ *max_length
-1;
907 if (!(rep_str
= ((REPLACE_STRING
*) rep_pos
))->replace_string
)
908 return (uint
) (to
- *start
)-1;
909 updated
=1; /* Some char * is replaced */
910 to
-=rep_str
->to_offset
;
911 for (pos
=rep_str
->replace_string
; *pos
; pos
++)
916 if (!(new=my_realloc(*start
,*max_length
,MYF(MY_WME
))))
918 to
=new+(to
- *start
);
919 end
=(*start
=new)+ *max_length
-1;
923 if (!*(from
-=rep_str
->from_offset
) && rep_pos
->found
!= 2)
924 return (uint
) (to
- *start
);
929 static char *buffer
; /* The buffer itself, grown as needed. */
930 static int bufbytes
; /* Number of bytes in the buffer. */
931 static int bufread
,my_eof
; /* Number of bytes to get with each read(). */
932 static uint bufalloc
;
933 static char *out_buff
;
934 static uint out_length
;
936 static int initialize_buffer()
939 bufalloc
= bufread
+ bufread
/ 2;
940 if (!(buffer
= my_malloc(bufalloc
+1,MYF(MY_WME
))))
944 if (!(out_buff
=my_malloc(out_length
,MYF(MY_WME
))))
949 static void reset_buffer()
954 static void free_buffer()
956 my_free(buffer
,MYF(MY_WME
));
957 my_free(out_buff
,MYF(MY_WME
));
962 Fill the buffer retaining the last n bytes at the beginning of the
963 newly filled buffer (for backward context). Returns the number of new
964 bytes read from disk.
967 static int fill_buffer_retaining(fd
,n
)
973 /* See if we need to grow the buffer. */
974 if ((int) bufalloc
- n
<= bufread
)
976 while ((int) bufalloc
- n
<= bufread
)
981 buffer
= my_realloc(buffer
, bufalloc
+1, MYF(MY_WME
));
986 /* Shift stuff down. */
987 bmove(buffer
,buffer
+bufbytes
-n
,(uint
) n
);
993 /* Read in new stuff. */
994 if ((i
=(int) my_read(fd
, (uchar
*) buffer
+ bufbytes
,
995 (size_t) bufread
, MYF(MY_WME
))) < 0)
998 /* Kludge to pretend every nonempty file ends with a newline. */
999 if (i
== 0 && bufbytes
> 0 && buffer
[bufbytes
- 1] != '\n')
1002 buffer
[bufbytes
] = '\n';
1009 /* Return 0 if convert is ok */
1010 /* Global variable update is set if something was changed */
1012 static int convert_pipe(rep
,in
,out
)
1018 char save_char
,*end_of_line
,*start_of_line
;
1019 DBUG_ENTER("convert_pipe");
1024 while ((error
=fill_buffer_retaining(fileno(in
),retain
)) > 0)
1026 end_of_line
=buffer
;
1027 buffer
[bufbytes
]=0; /* Sentinel */
1030 start_of_line
=end_of_line
;
1031 while (end_of_line
[0] != '\n' && end_of_line
[0])
1033 if (end_of_line
== buffer
+bufbytes
)
1035 retain
= (int) (end_of_line
- start_of_line
);
1036 break; /* No end of line, read more */
1038 save_char
=end_of_line
[0];
1041 if ((length
=replace_strings(rep
,&out_buff
,&out_length
,start_of_line
)) ==
1045 out_buff
[length
++]=save_char
; /* Don't write added newline */
1046 if (my_fwrite(out
, (uchar
*) out_buff
, length
, MYF(MY_WME
| MY_NABP
)))
1054 static int convert_file(REPLACE
*rep
, char * name
)
1058 char dir_buff
[FN_REFLEN
], tempname
[FN_REFLEN
], *org_name
= name
;
1059 #ifdef HAVE_READLINK
1060 char link_name
[FN_REFLEN
];
1063 size_t dir_buff_length
;
1064 DBUG_ENTER("convert_file");
1066 /* check if name is a symlink */
1067 #ifdef HAVE_READLINK
1068 org_name
= (!my_disable_symlinks
&&
1069 !my_readlink(link_name
, name
, MYF(0))) ? link_name
: name
;
1071 if (!(in
= my_fopen(org_name
,O_RDONLY
,MYF(MY_WME
))))
1073 dirname_part(dir_buff
, org_name
, &dir_buff_length
);
1074 if ((temp_file
= create_temp_file(tempname
, dir_buff
, "PR", O_WRONLY
,
1077 my_fclose(in
,MYF(0));
1080 if (!(out
= my_fdopen(temp_file
, tempname
, O_WRONLY
, MYF(MY_WME
))))
1082 my_fclose(in
,MYF(0));
1086 error
=convert_pipe(rep
,in
,out
);
1087 my_fclose(in
,MYF(0)); my_fclose(out
,MYF(0));
1089 if (updated
&& ! error
)
1090 my_redel(org_name
,tempname
,MYF(MY_WME
| MY_LINK_WARNING
));
1092 my_delete(tempname
,MYF(MY_WME
));
1093 if (!silent
&& ! error
)
1096 printf("%s converted\n",name
);
1098 printf("%s left unchanged\n",name
);