patch #7303
[mldonkey.git] / src / utils / cdk / string2.ml
blobc0487cc904a97f18e32453db3284dd1481545171
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 let string_ncmp s1 s2 n =
20 let sz1 = String.length s1 in
21 let sz2 = String.length s2 in
22 if sz1 < n || sz2 < n then s1 = s2
23 else
24 let s1' = String.sub s1 0 n in
25 let s2' = String.sub s2 0 n in
26 s1' = s2'
28 (* search str in doc from char deb *)
29 let search_from doc deb str =
30 let last = (String.length doc) - (String.length str) in
31 let i = ref deb in
32 let cont = ref true in
33 let len = String.length str in
34 let ok () =
35 try
36 for j=0 to len-1 do
37 if doc.[!i+j] != str.[j] then raise Not_found
38 done;
39 true
40 with _ -> false in
41 while !cont do
42 (* make sure we're not too far away *)
43 if !i > last
44 then raise Not_found;
45 (* Is it ok ? *)
46 if ok() then cont := false else incr i
47 done;
50 (* replace all the occurences of a char in a string by a given string *)
51 let replace doc chr str =
52 let res = Buffer.create (2 * (String.length doc)) in
53 let pos = ref 0 in
54 let rec aux () =
55 let new_pos = String.index_from doc !pos chr in
56 Buffer.add_substring res doc !pos (new_pos - !pos);
57 Buffer.add_string res str;
58 pos := new_pos + 1;
59 aux () in
60 (try
61 aux ()
62 with
63 | Not_found -> Buffer.add_substring res doc !pos ((String.length doc) - !pos)
64 | Invalid_argument _ -> ());
65 Buffer.contents res
67 let split s c =
68 let len = String.length s in
69 let rec iter pos =
70 try
71 if pos = len then [""] else
72 let pos2 = String.index_from s pos c in
73 if pos2 = pos then "" :: iter (pos+1) else
74 (String.sub s pos (pos2-pos)) :: (iter (pos2+1))
75 with _ -> [String.sub s pos (len-pos)]
77 iter 0
80 let splitn s c n =
81 let len = String.length s in
82 let rec iter pos n =
83 try
84 if n = 0 then raise Not_found else
85 if pos = len then [""] else
86 let pos2 = String.index_from s pos c in
87 (String.sub s pos (pos2-pos)) :: (iter (pos2+1) (n-1))
88 with _ -> [String.sub s pos (len-pos)]
90 iter 0 n
93 let rec remove_empty list list2 =
94 match list with
95 [] -> List.rev list2
96 | "" :: list -> remove_empty list list2
97 | s :: list -> remove_empty list (s :: list2)
99 let split_simplify s c =
100 let list = split s c in
101 remove_empty list []
103 let rec unsplit l c =
104 match l with
105 [] -> ""
106 | [x] -> x
107 | x :: ((y :: l) as tail) ->
108 Printf.sprintf "%s%c%s" x c (unsplit tail c)
111 let words s =
112 let len = String.length s in
113 let rec iter_out pos =
114 if pos = len then [] else
115 let c = s.[pos] in
116 match c with
117 ' ' | '\009' | '\010' | '\013' -> iter_out (pos+1)
118 | _ -> iter_in pos (pos+1)
120 and iter_in pos0 pos =
121 if pos = len then [String.sub s pos0 (len-pos0)] else
122 let c = s.[pos] in
123 match c with
124 ' ' | '\009' | '\010' | '\013' ->
125 (String.sub s pos0 (pos - pos0))::
126 (iter_out (pos+1))
127 | _ -> iter_in pos0 (pos+1)
129 iter_out 0
132 let convert init f s =
133 let len = String.length s in
134 let b = Buffer.create len in
135 let status = ref init in
136 for i = 0 to len - 1 do
137 let c = s.[i] in
138 status := f b !status c
139 done;
140 Buffer.contents b
142 let before s pos = String.sub s 0 pos
143 let after s pos =
144 let len = String.length s in
145 String.sub s pos (len - pos)
147 let cut_at s c =
149 let pos = String.index s c in
150 before s pos,
151 after s (pos+1);
152 with _ -> s, ""
155 let check_prefix s prefix =
156 let len = String.length s in
157 let plen = String.length prefix in
158 len >= plen && String.sub s 0 plen = prefix
160 let check_suffix s suffix =
161 let len = String.length s in
162 let slen = String.length suffix in
163 len >= slen && String.sub s (len - slen) slen = suffix
165 let upp_initial s =
166 if String.length s > 0 then
167 let s = String.copy s in
168 s.[0] <- Char.uppercase s.[0]; s
169 else
172 (* not optimal !*)
173 let rec subequal s1 pos1 s2 pos2 len =
174 let len1 = String.length s1 in
175 let len2 = String.length s2 in
176 pos1 + len <= len1 &&
177 pos2 + len <= len2 &&
178 (let rec iter pos =
179 pos = len ||
180 (s1.[pos1 + pos] = s2.[pos2 + pos] && iter (pos+1))
182 iter 0)
184 (* not optimal !*)
185 let subcontains s sub =
186 let slen = String.length sub in
187 let len = String.length s in
188 len >= slen && (
189 let rec after_pos pos =
190 not (pos + slen > len) &&
191 (subequal s pos sub 0 slen ||
192 after_pos (pos+1))
194 after_pos 0)
196 let of_char c = String.make 1 c
199 let resize s newlen =
200 let len = String.length s in
201 if len > newlen then String.sub s 0 newlen
202 else
203 let str = String.create newlen in
204 String.blit s 0 str 0 len;
207 let init len f =
208 let s = String.create len in
209 for i = 0 to len - 1 do
210 s.[i] <- f i
211 done;
214 let is_space c = c = ' ' || c = '\n' || c = '\r' || c = '\t'
216 let tokens s =
217 let len = String.length s in
218 let rec iter_space start_pos i =
219 if i = len then
220 [String.sub s start_pos (i - start_pos)]
221 else
222 let c = s.[i] in
223 if is_space c then
224 (String.sub s start_pos (i - start_pos)) ::
225 (iter_next (i+1))
226 else
227 iter_space start_pos (i+1)
229 and iter_next i =
230 if i = len then [] else
231 let c = s.[i] in
232 if is_space c then
233 iter_next (i+1)
234 else
235 if c = '\'' || c = '"' then
236 iter_delim (i+1) (i+1) c
237 else
238 iter_space i (i+1)
240 and iter_delim start_pos i delim =
241 if i = len then
242 failwith (Printf.sprintf "Unterminated delim %c" delim)
243 else
244 let c = s.[i] in
245 if c = delim then
246 (String.sub s start_pos (i - start_pos)) ::
247 (iter_next (i+1))
248 else
249 if c = '\\' && i + 1 < len then
250 iter_delim start_pos (i+2) delim
251 else
252 iter_delim start_pos (i+1) delim
255 iter_next 0
257 external contains : string -> string -> bool = "ml_strstr"
259 let rec strneql s1 s2 len =
260 len = 0 || (
261 let len = len - 1 in
262 s1.[len] = s2.[len] && strneql s1 s2 len)
264 let starts_with s1 s2 =
265 let len1 = String.length s1 in
266 let len2 = String.length s2 in
267 len2 <= len1 && strneql s1 s2 len2
269 let replace_char s c1 c2 =
270 for i = 0 to String.length s - 1 do
271 if s.[i] == c1 then s.[i] <- c2
272 done
274 let stem s =
275 let s = String.lowercase (String.copy s) in
276 for i = 0 to String.length s - 1 do
277 let c = s.[i] in
278 match c with
279 'a'..'z' | '0' .. '9' -> ()
280 | _ -> s.[i] <- ' ';
281 done;
282 split_simplify s ' '
284 let map f s =
285 let len = String.length s in
286 if len = 0 then [||] else
287 let v = f s.[0] in
288 let array = Array.create len v in
289 for i = 1 to len -1 do
290 array.(i) <- f s.[i]
291 done;
292 array
294 let iteri f s =
295 let len = String.length s in
296 for i = 0 to len-1 do
297 f i s.[i]
298 done
300 let init n f =
301 let s = String.create n in
302 for i = 0 to n - 1 do
303 s.[i] <- f i
304 done;
307 let exists p s =
308 let l = String.length s in
309 let rec aux i =
310 i < l && (p s.[i] || aux (i+1)) in
311 aux 0
313 let existsi p s =
314 let l = String.length s in
315 let rec aux i =
316 i < l && (p i s.[i] || aux (i+1)) in
317 aux 0
319 let for_all p s =
320 let l = String.length s in
321 let rec aux i =
322 i >= l || p s.[i] && aux (i+1) in
323 aux 0
325 let hex_string_of_string s =
326 let buf = Buffer.create 100 in
327 String.iter (fun c ->
328 Printf.bprintf buf "%02x " (int_of_char c)
329 ) s;
330 Buffer.contents buf
332 let ( |> ) x f = f x
334 let dehtmlize =
335 let br_regexp = Str.regexp_case_fold "<br>" in
336 let tag_regexp = Str.regexp "<[^>]*>" in
337 fun s ->
339 |> Str.global_replace br_regexp "\n"
340 |> Str.global_replace tag_regexp ""