fix some "deprecated" warnings
[mldonkey.git] / src / utils / lib / indexer.ml
blobc80f4494b27717986b3e3aa765d8fe6eddc34cba
1 (* Copyright 2001, 2002 b8_bavard, b8_fee_carabine, INRIA *)
2 (*
3 This file is part of mldonkey.
5 mldonkey is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 mldonkey is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with mldonkey; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 (* We need another indexer that would be able to compute lazily
21 the results of a search, as in Haskell. We would return the first 200
22 hits, then compute the other ones only if the client asks for. *)
24 let max_search_results = 1000
26 type 'a query =
27 And of 'a query * 'a query
28 | Or of 'a query * 'a query
29 | AndNot of 'a query * 'a query
30 | HasWord of string
31 | HasField of int * string
33 | HasMinField of int * int32
34 | HasMaxField of int * int32
36 | Predicate of ('a -> bool)
39 module type Doc = sig
40 type t
42 val num : t -> int
43 val filtered : t -> bool
44 val filter : t -> bool -> unit
45 end
48 module type Index = sig
49 type index
50 type node
51 type doc
52 val or_get_fields : doc Intmap.t ref -> node -> int -> doc Intmap.t
53 val find : index -> string -> node
54 val and_get_fields : node -> int -> doc Intmap.t -> doc Intmap.t
55 val size : node -> int
57 val stats : index -> int
58 (*
59 val min_field : int -> int32 -> doc Intmap.t
60 val max_field : int -> int32 -> doc Intmap.t
61 *)
62 end
65 (**************** SEARCH AND COMPLEX QUERIES ****************)
68 module QueryMake(Index: Index) = struct
70 (* AND entre plusieurs mots, avec un champ specifie par mot *)
72 open Index
74 let get_fields tree fields =
75 (* lprintf "get_fields"; lprint_newline (); *)
76 let map = ref Intmap.empty in
77 or_get_fields map tree fields
81 let complex_request idx req =
82 try
83 let trees = List.map (fun (s,fields) ->
84 (* lprintf "FIND [%s]" s; lprint_newline (); *)
85 s, find idx s, fields
86 ) req in
87 match trees with
88 [] -> []
89 | (s, tree, fields) :: tail ->
90 let hits = get_fields tree fields in
91 let hits = List.fold_left (fun hits (s, tree, fields) ->
92 (* lprintf "AND GET FIELD on [%s]" s; lprint_newline (); *)
93 and_get_fields tree fields hits) hits tail in
94 let list = ref [] in
95 Intmap.iter (fun _ doc ->
96 if not (Doc.filtered doc) then
97 list := doc :: !list) hits;
98 (* lprintf "FOUND %d documents" (List.length !list);
99 lprint_newline (); *)
100 !list
101 with Not_found ->
102 (* lprintf "Not found"; lprint_newline (); *)
106 let purge_map f map =
107 let new_map = ref Intmap.empty in
108 Intmap.iter (fun num doc ->
109 if f doc then
110 new_map := Intmap.add num doc !new_map) map;
111 !new_map
113 let merge_maps map1 map2 =
114 if map1 = Intmap.empty then map2 else
115 let new_map = ref map1 in
116 Intmap.iter (fun num doc ->
117 if not (Intmap.mem num !new_map) then
118 new_map := Intmap.add num doc !new_map) map2;
119 !new_map
121 let substract_map map map_not =
122 if map_not = Intmap.empty then map else
123 let new_map = ref Intmap.empty in
124 Intmap.iter (fun num doc ->
125 if not (Intmap.mem num map_not) then
126 new_map := Intmap.add num doc !new_map) map;
127 !new_map
129 let rec query_map idx q =
131 match q with
132 | HasWord s ->
133 let tree = find idx s in
134 get_fields tree (-1)
135 | HasField (fields, s) ->
136 (* lprintf "query_map %d %s" fields s; lprint_newline (); *)
137 let tree = find idx s in
138 get_fields tree fields
139 | And ((Predicate _) as q2, q1)
140 | And (q1, q2) ->
141 let map = query_map idx q1 in
142 and_query idx q2 map
143 | Or (q1, q2) ->
144 let map = query_map idx q1 in
145 or_query idx q2 map
146 | AndNot (q1, q2) ->
147 let map = query_map idx q1 in
148 andnot_query idx q2 map
149 | Predicate f ->
150 Intmap.empty
151 with _ -> Intmap.empty
153 and and_query idx q map =
155 if map = Intmap.empty then map else
156 match q with
157 | HasWord s ->
158 let tree = find idx s in
159 and_get_fields tree (-1) map
160 | HasField (fields, s) ->
161 (* lprintf "and_query %d %s" fields s; lprint_newline (); *)
162 let tree = find idx s in
163 and_get_fields tree fields map
164 | And (q1, q2) ->
165 let map = and_query idx q1 map in
166 and_query idx q2 map
167 | Or (q1, q2) ->
168 let map1 = and_query idx q1 map in
169 let map2 = and_query idx q2 map in
170 merge_maps map1 map2
171 | AndNot (q1, q2) ->
172 let map = and_query idx q1 map in
173 andnot_query idx q2 map
174 | Predicate f ->
175 purge_map f map
177 with _ -> Intmap.empty
179 and or_query idx q map =
181 match q with
182 | HasWord s ->
183 let tree = find idx s in
184 or_get_fields (ref map) tree (-1)
185 | HasField (fields, s) ->
186 let tree = find idx s in
187 or_get_fields (ref map) tree fields
188 | And (Predicate _ as q2, q1)
189 | And (q1, q2) ->
190 let map_and = query_map idx q1 in
191 let map_and = and_query idx q2 map_and in
192 merge_maps map map_and
193 | Or (q1, q2) ->
194 let map = or_query idx q1 map in
195 or_query idx q2 map
196 | AndNot (q1, q2) ->
197 let map_andnot = query_map idx q1 in
198 let map_andnot = andnot_query idx q2 map_andnot in
199 merge_maps map map_andnot
200 | Predicate f ->
203 with _ -> Intmap.empty
205 and andnot_query idx q map =
206 match q with
207 | HasWord s ->
208 let tree = find idx s in
209 let map_not = get_fields tree (-1) in
210 substract_map map map_not
211 | HasField (fields, s) ->
212 let tree = find idx s in
213 let map_not = get_fields tree fields in
214 substract_map map map_not
215 | And (Predicate _ as q2, q1)
216 | And (q1, q2) ->
217 let map_not = query_map idx q1 in
218 let map_not = and_query idx q2 map_not in
219 substract_map map map_not
220 | Or(q, Predicate f)
221 | Or(Predicate f, q) ->
222 let map = purge_map f map in
223 andnot_query idx q map
224 | Or (q1, q2) ->
225 let map_not = query_map idx q1 in
226 let map = substract_map map map_not in
227 let map_not = query_map idx q2 in
228 substract_map map map_not
229 | AndNot (q1, q2) ->
230 let map_not = query_map idx q1 in
231 let map_not = andnot_query idx q2 map_not in
232 substract_map map map_not
233 | Predicate f ->
234 purge_map (fun x -> not (f x)) map
236 let exit_exn= Exit
238 let rec simplify q =
239 match q with
240 And (q1, q2) ->
241 let q1 = simplify q1 in
242 let q2 = simplify q2 in
243 simplify1 (And (q1, q2))
244 | Or (q1, q2) ->
245 let q1 = simplify q1 in
246 let q2 = simplify q2 in
247 simplify1 (Or (q1, q2))
248 | AndNot (q1, q2) ->
249 let q1 = simplify q1 in
250 let q2 = simplify q2 in
251 simplify1 (AndNot (q1, q2))
252 | Predicate _
253 | HasField _
254 | HasWord _ -> q
256 and simplify1 q =
257 match q with
258 And (Predicate f1, Predicate f2) ->
259 Predicate (fun x -> f1 x && f2 x)
260 | AndNot (Predicate f1, Predicate f2) ->
261 Predicate (fun x -> f1 x && not (f2 x))
262 | And (AndNot (q1, q2), q3) ->
263 simplify1 (AndNot ( simplify1 (And (q1, q3)), q2))
264 | And (Predicate f, q) -> And (q, Predicate f)
265 | Or(q1, Or(q2,q3)) ->
266 simplify1 (Or (simplify1 (Or(q1,q2)), q3))
267 | And (q1, And(q2, q3)) ->
268 simplify1 (And (simplify1 (And(q1, q2)), q3))
269 | And (Or(q1,q2), q3) ->
270 simplify1 (Or(simplify1 (And(q1,q3)), simplify1 (And(q2,q3))))
271 | _ -> q
273 let query idx q =
274 let map = query_map idx (simplify q) in
275 let count = ref 0 in
276 let ele = ref None in
277 Intmap.iter (fun num doc ->
278 incr count;
279 if !count = 1 then ele := Some doc
280 ) map;
282 match !ele with
283 None -> [||]
284 | Some doc ->
285 let max_docs = min !count max_search_results in
286 let array = Array.make max_docs doc in
287 count := 0;
288 (try
289 Intmap.iter (fun num doc ->
290 if !count = max_docs then raise exit_exn;
291 array.(!count) <- doc;
292 incr count;
293 ) map;
294 with _ -> ());
295 array
298 module type Make = functor (Doc: Doc) ->
299 sig
300 type index
301 val create : unit -> index
303 val add : index -> string -> Doc.t -> int -> unit
305 val clear : index -> unit
307 val filter_words : index -> string list -> unit
308 val clear_filter : index -> unit
309 val filtered : Doc.t -> bool
311 type doc = Doc.t
312 type node
313 val or_get_fields : Doc.t Intmap.t ref -> node -> int -> Doc.t Intmap.t
314 val find : index -> string -> node
315 val and_get_fields : node -> int -> Doc.t Intmap.t -> Doc.t Intmap.t
316 val size : node -> int
318 val stats : index -> int
322 module FullMake (Doc : Doc)(Make:Make) =
323 struct
324 module Index = Make(Doc)
325 module Query = QueryMake(Index)
327 type index = Index.index
329 let query = Query.query
330 let query_map = Query.query_map
331 let filtered = Index.filtered
332 let clear_filter = Index.clear_filter
333 let filter_words = Index.filter_words
334 let clear = Index.clear
335 let add = Index.add
336 let create = Index.create
337 let stats = Index.stats