registry.el (registry-usage-test): Disable pruning test.
[emacs.git] / lisp / gnus / registry.el
blobb5cc3ec0e2ba6425856e7204e1794ad7206e9281
1 ;;; registry.el --- Track and remember data items by various fields
3 ;; Copyright (C) 2011 Free Software Foundation, Inc.
5 ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
6 ;; Keywords: data
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; This library provides a general-purpose EIEIO-based registry
26 ;; database with persistence, initialized with these fields:
28 ;; version: a float, 0.1 currently (don't change it)
30 ;; max-hard: an integer, default 5000000
32 ;; max-soft: an integer, default 50000
34 ;; precious: a list of symbols
36 ;; tracked: a list of symbols
38 ;; tracker: a hashtable tuned for 100 symbols to track (you should
39 ;; only access this with the :lookup2-function and the
40 ;; :lookup2+-function)
42 ;; data: a hashtable with default size 10K and resize threshold 2.0
43 ;; (this reflects the expected usage so override it if you know better)
45 ;; ...plus methods to do all the work: `registry-search',
46 ;; `registry-lookup', `registry-lookup-secondary',
47 ;; `registry-lookup-secondary-value', `registry-insert',
48 ;; `registry-delete', `registry-prune', `registry-size' which see
50 ;; and with the following properties:
52 ;; Every piece of data has a unique ID and some general-purpose fields
53 ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
55 ;; ((F1 D1) (F2 D2) (F3 a b c))
57 ;; Note that whether a field has one or many pieces of data, the data
58 ;; is always a list of values.
60 ;; The user decides which fields are "precious", F2 for example. At
61 ;; PRUNE TIME (when the :prune-function is called), the registry will
62 ;; trim any entries without the F2 field until the size is :max-soft
63 ;; or less. No entries with the F2 field will be removed at PRUNE
64 ;; TIME.
66 ;; When an entry is inserted, the registry will reject new entries
67 ;; if they bring it over the max-hard limit, even if they have the F2
68 ;; field.
70 ;; The user decides which fields are "tracked", F1 for example. Any
71 ;; new entry is then indexed by all the tracked fields so it can be
72 ;; quickly looked up that way. The data is always a list (see example
73 ;; above) and each list element is indexed.
75 ;; Precious and tracked field names must be symbols. All other
76 ;; fields can be any other Emacs Lisp types.
78 ;;; Code:
80 (eval-when-compile (require 'cl))
82 (eval-when-compile
83 (when (null (ignore-errors (require 'ert)))
84 (defmacro* ert-deftest (name () &body docstring-keys-and-body))))
86 (ignore-errors
87 (require 'ert))
88 (eval-and-compile
89 (or (ignore-errors (progn
90 (require 'eieio)
91 (require 'eieio-base)))
92 ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
93 (ignore-errors
94 (let ((load-path (cons (expand-file-name
95 "gnus-fallback-lib/eieio"
96 (file-name-directory (locate-library "gnus")))
97 load-path)))
98 (require 'eieio)
99 (require 'eieio-base)))
100 (error
101 "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
103 (defclass registry-db (eieio-persistent)
104 ((version :initarg :version
105 :initform 0.1
106 :type float
107 :custom float
108 :documentation "The registry version.")
109 (max-hard :initarg :max-hard
110 :initform 5000000
111 :type integer
112 :custom integer
113 :documentation "Never accept more than this many elements.")
114 (max-soft :initarg :max-soft
115 :initform 50000
116 :type integer
117 :custom integer
118 :documentation "Prune as much as possible to get to this size.")
119 (tracked :initarg :tracked
120 :initform nil
121 :type t
122 :documentation "The tracked (indexed) fields, a list of symbols.")
123 (precious :initarg :precious
124 :initform nil
125 :type t
126 :documentation "The precious fields, a list of symbols.")
127 (tracker :initarg :tracker
128 :type hash-table
129 :documentation "The field tracking hashtable.")
130 (data :initarg :data
131 :type hash-table
132 :documentation "The data hashtable.")))
134 (eval-and-compile
135 (defmethod initialize-instance :AFTER ((this registry-db) slots)
136 "Set value of data slot of THIS after initialization."
137 (with-slots (data tracker) this
138 (unless (member :data slots)
139 (setq data
140 (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
141 (unless (member :tracker slots)
142 (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))
144 (defmethod registry-lookup ((db registry-db) keys)
145 "Search for KEYS in the registry-db THIS.
146 Returns a alist of the key followed by the entry in a list, not a cons cell."
147 (let ((data (oref db :data)))
148 (delq nil
149 (mapcar
150 (lambda (k)
151 (when (gethash k data)
152 (list k (gethash k data))))
153 keys))))
155 (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
156 "Search for KEYS in the registry-db THIS.
157 Returns a alist of the key followed by the entry in a list, not a cons cell."
158 (let ((data (oref db :data)))
159 (delq nil
160 (loop for key in keys
161 when (gethash key data)
162 collect (list key (gethash key data))))))
164 (defmethod registry-lookup-secondary ((db registry-db) tracksym
165 &optional create)
166 "Search for TRACKSYM in the registry-db THIS.
167 When CREATE is not nil, create the secondary index hashtable if needed."
168 (let ((h (gethash tracksym (oref db :tracker))))
169 (if h
171 (when create
172 (puthash tracksym
173 (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
174 (oref db :tracker))
175 (gethash tracksym (oref db :tracker))))))
177 (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
178 &optional set)
179 "Search for TRACKSYM with value VAL in the registry-db THIS.
180 When SET is not nil, set it for VAL (use t for an empty list)."
181 ;; either we're asked for creation or there should be an existing index
182 (when (or set (registry-lookup-secondary db tracksym))
183 ;; set the entry if requested,
184 (when set
185 (puthash val (if (eq t set) '() set)
186 (registry-lookup-secondary db tracksym t)))
187 (gethash val (registry-lookup-secondary db tracksym)))))
189 (defun registry--match (mode entry check-list)
190 ;; for all members
191 (when check-list
192 (let ((key (nth 0 (nth 0 check-list)))
193 (vals (cdr-safe (nth 0 check-list)))
194 found)
195 (while (and key vals (not found))
196 (setq found (case mode
197 (:member
198 (member (car-safe vals) (cdr-safe (assoc key entry))))
199 (:regex
200 (string-match (car vals)
201 (mapconcat
202 'prin1-to-string
203 (cdr-safe (assoc key entry))
204 "\0"))))
205 vals (cdr-safe vals)))
206 (or found
207 (registry--match mode entry (cdr-safe check-list))))))
209 (eval-and-compile
210 (defmethod registry-search ((db registry-db) &rest spec)
211 "Search for SPEC across the registry-db THIS.
212 For example calling with :member '(a 1 2) will match entry '((a 3 1)).
213 Calling with :all t (any non-nil value) will match all.
214 Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
215 The test order is to check :all first, then :member, then :regex."
216 (when db
217 (let ((all (plist-get spec :all))
218 (member (plist-get spec :member))
219 (regex (plist-get spec :regex)))
220 (loop for k being the hash-keys of (oref db :data)
221 using (hash-values v)
222 when (or
223 ;; :all non-nil returns all
225 ;; member matching
226 (and member (registry--match :member v member))
227 ;; regex matching
228 (and regex (registry--match :regex v regex)))
229 collect k))))
231 (defmethod registry-delete ((db registry-db) keys assert &rest spec)
232 "Delete KEYS from the registry-db THIS.
233 If KEYS is nil, use SPEC to do a search.
234 Updates the secondary ('tracked') indices as well.
235 With assert non-nil, errors out if the key does not exist already."
236 (let* ((data (oref db :data))
237 (keys (or keys
238 (apply 'registry-search db spec)))
239 (tracked (oref db :tracked)))
241 (dolist (key keys)
242 (let ((entry (gethash key data)))
243 (when assert
244 (assert entry nil
245 "Key %s does not exists in database" key))
246 ;; clean entry from the secondary indices
247 (dolist (tr tracked)
248 ;; is this tracked symbol indexed?
249 (when (registry-lookup-secondary db tr)
250 ;; for every value in the entry under that key...
251 (dolist (val (cdr-safe (assq tr entry)))
252 (let* ((value-keys (registry-lookup-secondary-value
253 db tr val)))
254 (when (member key value-keys)
255 ;; override the previous value
256 (registry-lookup-secondary-value
257 db tr val
258 ;; with the indexed keys MINUS the current key
259 ;; (we pass t when the list is empty)
260 (or (delete key value-keys) t)))))))
261 (remhash key data)))
262 keys))
264 (defmethod registry-full ((db registry-db))
265 "Checks if registry-db THIS is full."
266 (>= (registry-size db)
267 (oref db :max-hard)))
269 (defmethod registry-insert ((db registry-db) key entry)
270 "Insert ENTRY under KEY into the registry-db THIS.
271 Updates the secondary ('tracked') indices as well.
272 Errors out if the key exists already."
274 (assert (not (gethash key (oref db :data))) nil
275 "Key already exists in database")
277 (assert (not (registry-full db))
279 "registry max-hard size limit reached")
281 ;; store the entry
282 (puthash key entry (oref db :data))
284 ;; store the secondary indices
285 (dolist (tr (oref db :tracked))
286 ;; for every value in the entry under that key...
287 (dolist (val (cdr-safe (assq tr entry)))
288 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
289 (pushnew key value-keys :test 'equal)
290 (registry-lookup-secondary-value db tr val value-keys))))
291 entry)
293 (defmethod registry-reindex ((db registry-db))
294 "Rebuild the secondary indices of registry-db THIS."
295 (let ((count 0)
296 (expected (* (length (oref db :tracked)) (registry-size db))))
297 (dolist (tr (oref db :tracked))
298 (let (values)
299 (maphash
300 (lambda (key v)
301 (incf count)
302 (when (and (< 0 expected)
303 (= 0 (mod count 1000)))
304 (message "reindexing: %d of %d (%.2f%%)"
305 count expected (/ (* 100 count) expected)))
306 (dolist (val (cdr-safe (assq tr v)))
307 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
308 (push key value-keys)
309 (registry-lookup-secondary-value db tr val value-keys))))
310 (oref db :data))))))
312 (defmethod registry-size ((db registry-db))
313 "Returns the size of the registry-db object THIS.
314 This is the key count of the :data slot."
315 (hash-table-count (oref db :data)))
317 (defmethod registry-prune ((db registry-db) &optional sortfun)
318 "Prunes the registry-db object THIS.
319 Removes only entries without the :precious keys if it can,
320 then removes oldest entries first.
321 Returns the number of deleted entries.
322 If SORTFUN is given, tries to keep entries that sort *higher*.
323 SORTFUN is passed only the two keys so it must look them up directly."
324 (dolist (collector '(registry-prune-soft-candidates
325 registry-prune-hard-candidates))
326 (let* ((size (registry-size db))
327 (collected (funcall collector db))
328 (limit (nth 0 collected))
329 (candidates (nth 1 collected))
330 ;; sort the candidates if SORTFUN was given
331 (candidates (if sortfun (sort candidates sortfun) candidates))
332 (candidates-count (length candidates))
333 ;; are we over max-soft?
334 (prune-needed (> size limit)))
336 ;; while we have more candidates than we need to remove...
337 (while (and (> candidates-count (- size limit)) candidates)
338 (decf candidates-count)
339 (setq candidates (cdr candidates)))
341 (registry-delete db candidates nil)
342 (length candidates))))
344 (defmethod registry-prune-soft-candidates ((db registry-db))
345 "Collects pruning candidates from the registry-db object THIS.
346 Proposes only entries without the :precious keys."
347 (let* ((precious (oref db :precious))
348 (precious-p (lambda (entry-key)
349 (cdr (memq (car entry-key) precious))))
350 (data (oref db :data))
351 (limit (oref db :max-soft))
352 (candidates (loop for k being the hash-keys of data
353 using (hash-values v)
354 when (notany precious-p v)
355 collect k)))
356 (list limit candidates)))
358 (defmethod registry-prune-hard-candidates ((db registry-db))
359 "Collects pruning candidates from the registry-db object THIS.
360 Proposes any entries over the max-hard limit minus 10."
361 (let* ((data (oref db :data))
362 ;; prune to 10 below the max-hard limit so we're not
363 ;; pruning all the time
364 (limit (- (oref db :max-hard) 10))
365 (candidates (loop for k being the hash-keys of data
366 collect k)))
367 (list limit candidates))))
369 (ert-deftest registry-instantiation-test ()
370 (should (registry-db "Testing")))
372 (ert-deftest registry-match-test ()
373 (let ((entry '((hello "goodbye" "bye") (blank))))
375 (message "Testing :regex matching")
376 (should (registry--match :regex entry '((hello "nye" "bye"))))
377 (should (registry--match :regex entry '((hello "good"))))
378 (should-not (registry--match :regex entry '((hello "nye"))))
379 (should-not (registry--match :regex entry '((hello))))
381 (message "Testing :member matching")
382 (should (registry--match :member entry '((hello "bye"))))
383 (should (registry--match :member entry '((hello "goodbye"))))
384 (should-not (registry--match :member entry '((hello "good"))))
385 (should-not (registry--match :member entry '((hello "nye"))))
386 (should-not (registry--match :member entry '((hello)))))
387 (message "Done with matching testing."))
389 (defun registry-make-testable-db (n &optional name file)
390 (let* ((db (registry-db
391 (or name "Testing")
392 :file (or file "unused")
393 :max-hard n
394 :max-soft 0 ; keep nothing not precious
395 :precious '(extra more-extra)
396 :tracked '(sender subject groups))))
397 (dotimes (i n)
398 (registry-insert db i `((sender "me")
399 (subject "about you")
400 (more-extra) ; empty data key should be pruned
401 ;; first 5 entries will NOT have this extra data
402 ,@(when (< 5 i) (list (list 'extra "more data")))
403 (groups ,(number-to-string i)))))
404 db))
406 (ert-deftest registry-usage-test ()
407 (let* ((n 100)
408 (db (registry-make-testable-db n)))
409 (message "size %d" n)
410 (should (= n (registry-size db)))
411 (message "max-hard test")
412 (should-error (registry-insert db "new" '()))
413 (message "Individual lookup")
414 (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
415 (message "Grouped individual lookup")
416 (should (= 3 (length (registry-lookup db '(1 58 99)))))
417 (when (boundp 'lexical-binding)
418 (message "Individual lookup (breaks before lexbind)")
419 (should (= 58
420 (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99)))))
421 (message "Grouped individual lookup (breaks before lexbind)")
422 (should (= 3
423 (length (registry-lookup-breaks-before-lexbind db
424 '(1 58 99))))))
425 (message "Search")
426 (should (= n (length (registry-search db :all t))))
427 (should (= n (length (registry-search db :member '((sender "me"))))))
428 (message "Secondary index search")
429 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
430 (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
431 (message "Delete")
432 (should (registry-delete db '(1) t))
433 (decf n)
434 (message "Search after delete")
435 (should (= n (length (registry-search db :all t))))
436 (message "Secondary search after delete")
437 (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
438 ;; (message "Pruning")
439 ;; (let* ((tokeep (registry-search db :member '((extra "more data"))))
440 ;; (count (- n (length tokeep)))
441 ;; (pruned (registry-prune db))
442 ;; (prune-count (length pruned)))
443 ;; (message "Expecting to prune %d entries and pruned %d"
444 ;; count prune-count)
445 ;; (should (and (= count 5)
446 ;; (= count prune-count))))
447 (message "Done with usage testing.")))
449 (ert-deftest registry-persistence-test ()
450 (let* ((n 100)
451 (tempfile (make-temp-file "registry-persistence-"))
452 (name "persistence tester")
453 (db (registry-make-testable-db n name tempfile))
454 size back)
455 (message "Saving to %s" tempfile)
456 (eieio-persistent-save db)
457 (setq size (nth 7 (file-attributes tempfile)))
458 (message "Saved to %s: size %d" tempfile size)
459 (should (< 0 size))
460 (with-temp-buffer
461 (insert-file-contents-literally tempfile)
462 (should (looking-at (concat ";; Object "
463 name
464 "\n;; EIEIO PERSISTENT OBJECT"))))
465 (message "Reading object back")
466 (setq back (eieio-persistent-read tempfile))
467 (should back)
468 (message "Read object back: %d keys, expected %d==%d"
469 (registry-size back) n (registry-size db))
470 (should (= (registry-size back) n))
471 (should (= (registry-size back) (registry-size db)))
472 (delete-file tempfile))
473 (message "Done with persistence testing."))
475 (provide 'registry)
476 ;;; registry.el ends here