drop md4 i?86 specific asm implementations
[mldonkey.git] / src / utils / lib / indexer2.ml
blob769b68be30f393c76e6db574e9b68263ea87d731
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 open Printf2
22 module Make(Doc : sig
23 type t
25 val num : t -> int
26 val filtered : t -> bool
27 val filter : t -> bool -> unit
28 end) = struct
31 type doc = Doc.t
33 type filtered_node =
34 None
35 | Some of node
36 | Filtered of node
38 and node = {
39 mutable next_doc : int;
40 mutable docs : Doc.t array;
41 mutable fields : int array;
42 mutable ndocs : int;
44 mutable nodes : filtered_node array;
47 type index = {
48 mutable node : node;
52 let stats index =
53 let mem = ref 2 in
54 let rec iter node =
55 mem := !mem + 9 +
56 Array.length node.docs + Array.length node.fields +
57 Array.length node.nodes;
58 Array.iter (fun n ->
59 match n with None -> ()
60 | Some node ->
61 mem := !mem + 2;
62 iter node
63 | Filtered node ->
64 mem := !mem + 2;
65 iter node
66 ) node.nodes
68 iter index.node;
69 !mem
71 let new_node () = {
72 next_doc = 0;
73 docs = [||];
74 fields = [||];
75 nodes = [||];
76 ndocs = 0;
79 let empty = new_node ()
81 let create () = { node = new_node () }
83 let convert_char c =
84 match c with
85 'a' .. 'z' -> int_of_char c - 97
86 | 'A' .. 'Z' -> int_of_char c - 65
87 | '0' .. '9' -> 26 + (int_of_char c - int_of_char '0')
88 | _ -> assert false
90 let exit_exn = Exit
92 let add_doc node doc fields =
93 (* lprintf "add_doc"; lprint_newline (); *)
94 let len = Array.length node.docs in
95 let pos = node.next_doc in
96 try
97 for i = 0 to node.next_doc - 1 do
98 if node.docs.(i) == doc then begin
99 node.fields.(i) <- node.fields.(i) lor fields;
100 raise exit_exn;
101 end;
102 done;
103 if pos = len then begin
104 let new_docs = Array.make (len + len/2 + 2) doc in
105 let new_fields = Array.make (len + len/2 + 2) 0 in
106 Array.blit node.docs 0 new_docs 0 len;
107 Array.blit node.fields 0 new_fields 0 len;
108 node.docs <- new_docs;
109 node.fields <- new_fields
110 end;
111 node.docs.(pos) <- doc;
112 (* lprintf "Adding doc with field %d" fields; lprint_newline (); *)
113 node.fields.(pos) <- fields;
114 node.next_doc <- pos +1;
115 node.ndocs <- node.ndocs + 1;
116 true
117 with e ->
118 (* lprintf "exn %s" (Printexc2.to_string e); lprint_newline (); *)
119 false
120 (* ; lprintf "done"; lprint_newline () *)
122 let add_char node c =
123 (* lprintf "add_char"; lprint_newline (); *)
124 let n = new_node () in
125 let len = Array.length node.nodes in
126 if len <= c then begin
127 let new_nodes = Array.make (c+1) None in
128 Array.blit node.nodes 0 new_nodes 0 len;
129 node.nodes <- new_nodes;
130 end;
131 (* lprintf "set %d %d %d" c len (Array.length node.nodes); lprint_newline (); *)
132 node.nodes.(c) <- Some n;
133 (* lprintf "done"; lprint_newline (); *)
136 let add index string doc fields =
137 (* lprintf "add (%s)" string; lprint_newline (); *)
139 (* lprintf "add"; lprint_newline (); *)
140 let len = String.length string in
141 let rec iter pos node =
142 (* lprintf "pos %d" pos; lprint_newline (); *)
143 if pos = len then
144 if add_doc node doc fields then begin
145 node.ndocs <- node.ndocs + 1;
146 true
147 end else false
148 else
149 let c = string.[pos] in
150 let c = convert_char c in
151 let node =
152 if Array.length node.nodes > c then
153 match node.nodes.(c) with
154 None -> add_char node c
155 | Some node -> node
156 | Filtered _ ->
157 Doc.filter doc true;
158 lprintf_nl "doc filtered";
159 raise Not_found
160 else
161 add_char node c
163 iter (pos+1) node
165 ignore (iter 0 index.node)
166 with e ->
167 (* lprintf "Exc %s" (Printexc2.to_string e);lprint_newline (); *)
169 (* ; lprintf "done"; lprint_newline () *)
171 let clear index = index.node <- new_node ()
173 let filter_node node bool =
174 (* lprintf "filter node"; lprint_newline (); *)
175 for i = 0 to node.next_doc - 1 do
176 (* lprintf "filter doc %s\n" (string_of_bool bool); *)
177 Doc.filter node.docs.(i) bool;
178 (* if Doc.filtered node.docs.(i) then
179 (lprintf "doc is filtered\n"); *)
180 done
182 let rec filter_nodes node bool =
183 (* lprintf "filter_nodes"; lprint_newline (); *)
184 filter_node node bool;
185 let len = Array.length node.nodes in
186 for i = 0 to len - 1 do
187 match node.nodes.(i) with
188 None -> ()
189 | Some n -> filter_nodes n bool
190 | Filtered n -> filter_nodes n bool
191 done
193 let add_filter index s =
195 let len = String.length s in
196 let rec iter pos node =
197 let c = s.[pos] in
198 let c = convert_char c in
199 let n =
200 if Array.length node.nodes > c then
201 match node.nodes.(c) with
202 None -> add_char node c
203 | Some node -> node
204 | Filtered _ -> raise Not_found
205 else
206 add_char node c
208 if pos+1 = len then begin
209 filter_nodes n true;
210 node.nodes.(c) <- Filtered n
211 end else
212 iter (pos+1) n
214 iter 0 index.node
215 with _ -> ()
218 let filter_words index list =
219 (* lprintf "FILTER ALL"; lprint_newline (); *)
220 List.iter (fun s ->
221 (* lprintf "filter (%s)" s; lprint_newline (); *)
222 add_filter index s) list
224 let clear_filter index =
225 (* lprintf "CLEAR FILTER"; lprint_newline (); *)
226 let rec iter node =
227 let len = Array.length node.nodes in
228 for i = 0 to len - 1 do
229 match node.nodes.(i) with
230 Filtered n ->
231 node.nodes.(i) <- Some n;
232 filter_node n false;
233 iter_in n
234 | Some n -> iter n
235 | _ -> ()
236 done
238 and iter_in node =
239 let len = Array.length node.nodes in
240 for i = 0 to len - 1 do
241 match node.nodes.(i) with
242 Filtered n ->
243 node.nodes.(i) <- Some n;
244 filter_node n false;
245 iter_in n
246 | Some n ->
247 filter_node n false;
248 iter_in n
249 | _ -> ()
250 done
252 iter index.node
254 let filtered doc = Doc.filtered doc
257 let find node s =
258 let len = String.length s in
259 let rec iter node pos =
260 if pos = len then node else
261 let c = s.[pos] in
262 let c = convert_char c in
263 if Array.length node.nodes > c then
264 match node.nodes.(c) with
265 None -> raise Not_found
266 | Some node -> iter node (pos+1)
267 | Filtered _ -> raise Not_found
268 else raise Not_found
271 iter node.node 0
272 with _ -> empty
274 let or_get_fields map node fields =
275 let rec iter node =
276 for i = 0 to node.next_doc - 1 do
277 if node.fields.(i) land fields <> 0 then
278 let doc = node.docs.(i) in
279 if not (Doc.filtered doc) &&
280 not (Intmap.mem (Doc.num doc) !map) then
281 map := Intmap.add (Doc.num doc) doc !map
282 done;
283 for i = 0 to Array.length node.nodes - 1 do
284 match node.nodes.(i) with
285 None -> ()
286 | Some node -> iter node
287 | Filtered _ -> ()
288 done;
289 in iter node;
290 !map
292 let and_get_fields node fields and_map =
293 let map = ref Intmap.empty in
294 let rec iter node =
295 for i = 0 to node.next_doc - 1 do
296 if node.fields.(i) land fields <> 0 then
297 let doc = node.docs.(i) in
298 if (Intmap.mem (Doc.num doc) and_map) &&
299 not (Intmap.mem (Doc.num doc) !map) then
300 map := Intmap.add (Doc.num doc) doc !map
301 done;
302 for i = 0 to Array.length node.nodes - 1 do
303 match node.nodes.(i) with
304 None -> ()
305 | Some node -> iter node
306 | Filtered _ -> ()
307 done;
308 in iter node;
309 !map
311 let size node = node.ndocs
315 module FullMake (Doc : Indexer.Doc) = Indexer.FullMake (Doc ) (Make)