1 ;;; registry.el --- Track and remember data items by various fields
3 ;; Copyright (C) 2011 Free Software Foundation, Inc.
5 ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
25 ;; This library provides a general-purpose EIEIO-based registry
26 ;; database with persistence, initialized with these fields:
28 ;; version: a float, 0.1 currently (don't change it)
30 ;; max-hard: an integer, default 5000000
32 ;; max-soft: an integer, default 50000
34 ;; precious: a list of symbols
36 ;; tracked: a list of symbols
38 ;; tracker: a hashtable tuned for 100 symbols to track (you should
39 ;; only access this with the :lookup2-function and the
40 ;; :lookup2+-function)
42 ;; data: a hashtable with default size 10K and resize threshold 2.0
43 ;; (this reflects the expected usage so override it if you know better)
45 ;; ...plus methods to do all the work: `registry-search',
46 ;; `registry-lookup', `registry-lookup-secondary',
47 ;; `registry-lookup-secondary-value', `registry-insert',
48 ;; `registry-delete', `registry-prune', `registry-size' which see
50 ;; and with the following properties:
52 ;; Every piece of data has a unique ID and some general-purpose fields
53 ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
55 ;; ((F1 D1) (F2 D2) (F3 a b c))
57 ;; Note that whether a field has one or many pieces of data, the data
58 ;; is always a list of values.
60 ;; The user decides which fields are "precious", F2 for example. At
61 ;; PRUNE TIME (when the :prune-function is called), the registry will
62 ;; trim any entries without the F2 field until the size is :max-soft
63 ;; or less. No entries with the F2 field will be removed at PRUNE
66 ;; When an entry is inserted, the registry will reject new entries
67 ;; if they bring it over the max-hard limit, even if they have the F2
70 ;; The user decides which fields are "tracked", F1 for example. Any
71 ;; new entry is then indexed by all the tracked fields so it can be
72 ;; quickly looked up that way. The data is always a list (see example
73 ;; above) and each list element is indexed.
75 ;; Precious and tracked field names must be symbols. All other
76 ;; fields can be any other Emacs Lisp types.
80 (eval-when-compile (require 'cl
))
83 (when (null (ignore-errors (require 'ert
)))
84 (defmacro* ert-deftest
(name () &body docstring-keys-and-body
))))
89 (or (ignore-errors (progn
91 (require 'eieio-base
)))
92 ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
94 (let ((load-path (cons (expand-file-name
95 "gnus-fallback-lib/eieio"
96 (file-name-directory (locate-library "gnus")))
99 (require 'eieio-base
)))
101 "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
103 (defclass registry-db
(eieio-persistent)
104 ((version :initarg
:version
108 :documentation
"The registry version.")
109 (max-hard :initarg
:max-hard
113 :documentation
"Never accept more than this many elements.")
114 (max-soft :initarg
:max-soft
118 :documentation
"Prune as much as possible to get to this size.")
120 :initarg
:prune-factor
124 :documentation
"At the max-hard limit, prune size * this entries.")
125 (tracked :initarg
:tracked
128 :documentation
"The tracked (indexed) fields, a list of symbols.")
129 (precious :initarg
:precious
132 :documentation
"The precious fields, a list of symbols.")
133 (tracker :initarg
:tracker
135 :documentation
"The field tracking hashtable.")
138 :documentation
"The data hashtable.")))
141 (defmethod initialize-instance :AFTER
((this registry-db
) slots
)
142 "Set value of data slot of THIS after initialization."
143 (with-slots (data tracker
) this
144 (unless (member :data slots
)
146 (make-hash-table :size
10000 :rehash-size
2.0 :test
'equal
)))
147 (unless (member :tracker slots
)
148 (setq tracker
(make-hash-table :size
100 :rehash-size
2.0)))))
150 (defmethod registry-lookup ((db registry-db
) keys
)
151 "Search for KEYS in the registry-db THIS.
152 Returns a alist of the key followed by the entry in a list, not a cons cell."
153 (let ((data (oref db
:data
)))
157 (when (gethash k data
)
158 (list k
(gethash k data
))))
161 (defmethod registry-lookup-breaks-before-lexbind ((db registry-db
) keys
)
162 "Search for KEYS in the registry-db THIS.
163 Returns a alist of the key followed by the entry in a list, not a cons cell."
164 (let ((data (oref db
:data
)))
166 (loop for key in keys
167 when
(gethash key data
)
168 collect
(list key
(gethash key data
))))))
170 (defmethod registry-lookup-secondary ((db registry-db
) tracksym
172 "Search for TRACKSYM in the registry-db THIS.
173 When CREATE is not nil, create the secondary index hashtable if needed."
174 (let ((h (gethash tracksym
(oref db
:tracker
))))
179 (make-hash-table :size
800 :rehash-size
2.0 :test
'equal
)
181 (gethash tracksym
(oref db
:tracker
))))))
183 (defmethod registry-lookup-secondary-value ((db registry-db
) tracksym val
185 "Search for TRACKSYM with value VAL in the registry-db THIS.
186 When SET is not nil, set it for VAL (use t for an empty list)."
187 ;; either we're asked for creation or there should be an existing index
188 (when (or set
(registry-lookup-secondary db tracksym
))
189 ;; set the entry if requested,
191 (puthash val
(if (eq t set
) '() set
)
192 (registry-lookup-secondary db tracksym t
)))
193 (gethash val
(registry-lookup-secondary db tracksym
)))))
195 (defun registry--match (mode entry check-list
)
198 (let ((key (nth 0 (nth 0 check-list
)))
199 (vals (cdr-safe (nth 0 check-list
)))
201 (while (and key vals
(not found
))
202 (setq found
(case mode
204 (member (car-safe vals
) (cdr-safe (assoc key entry
))))
206 (string-match (car vals
)
209 (cdr-safe (assoc key entry
))
211 vals
(cdr-safe vals
)))
213 (registry--match mode entry
(cdr-safe check-list
))))))
216 (defmethod registry-search ((db registry-db
) &rest spec
)
217 "Search for SPEC across the registry-db THIS.
218 For example calling with :member '(a 1 2) will match entry '((a 3 1)).
219 Calling with :all t (any non-nil value) will match all.
220 Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
221 The test order is to check :all first, then :member, then :regex."
223 (let ((all (plist-get spec
:all
))
224 (member (plist-get spec
:member
))
225 (regex (plist-get spec
:regex
)))
226 (loop for k being the hash-keys of
(oref db
:data
)
227 using
(hash-values v
)
229 ;; :all non-nil returns all
232 (and member
(registry--match :member v member
))
234 (and regex
(registry--match :regex v regex
)))
237 (defmethod registry-delete ((db registry-db
) keys assert
&rest spec
)
238 "Delete KEYS from the registry-db THIS.
239 If KEYS is nil, use SPEC to do a search.
240 Updates the secondary ('tracked') indices as well.
241 With assert non-nil, errors out if the key does not exist already."
242 (let* ((data (oref db
:data
))
244 (apply 'registry-search db spec
)))
245 (tracked (oref db
:tracked
)))
248 (let ((entry (gethash key data
)))
251 "Key %s does not exists in database" key
))
252 ;; clean entry from the secondary indices
254 ;; is this tracked symbol indexed?
255 (when (registry-lookup-secondary db tr
)
256 ;; for every value in the entry under that key...
257 (dolist (val (cdr-safe (assq tr entry
)))
258 (let* ((value-keys (registry-lookup-secondary-value
260 (when (member key value-keys
)
261 ;; override the previous value
262 (registry-lookup-secondary-value
264 ;; with the indexed keys MINUS the current key
265 ;; (we pass t when the list is empty)
266 (or (delete key value-keys
) t
)))))))
270 (defmethod registry-full ((db registry-db
))
271 "Checks if registry-db THIS is full."
272 (>= (registry-size db
)
273 (oref db
:max-hard
)))
275 (defmethod registry-insert ((db registry-db
) key entry
)
276 "Insert ENTRY under KEY into the registry-db THIS.
277 Updates the secondary ('tracked') indices as well.
278 Errors out if the key exists already."
280 (assert (not (gethash key
(oref db
:data
))) nil
281 "Key already exists in database")
283 (assert (not (registry-full db
))
285 "registry max-hard size limit reached")
288 (puthash key entry
(oref db
:data
))
290 ;; store the secondary indices
291 (dolist (tr (oref db
:tracked
))
292 ;; for every value in the entry under that key...
293 (dolist (val (cdr-safe (assq tr entry
)))
294 (let* ((value-keys (registry-lookup-secondary-value db tr val
)))
295 (pushnew key value-keys
:test
'equal
)
296 (registry-lookup-secondary-value db tr val value-keys
))))
299 (defmethod registry-reindex ((db registry-db
))
300 "Rebuild the secondary indices of registry-db THIS."
302 (expected (* (length (oref db
:tracked
)) (registry-size db
))))
303 (dolist (tr (oref db
:tracked
))
308 (when (and (< 0 expected
)
309 (= 0 (mod count
1000)))
310 (message "reindexing: %d of %d (%.2f%%)"
311 count expected
(/ (* 100 count
) expected
)))
312 (dolist (val (cdr-safe (assq tr v
)))
313 (let* ((value-keys (registry-lookup-secondary-value db tr val
)))
314 (push key value-keys
)
315 (registry-lookup-secondary-value db tr val value-keys
))))
318 (defmethod registry-size ((db registry-db
))
319 "Returns the size of the registry-db object THIS.
320 This is the key count of the :data slot."
321 (hash-table-count (oref db
:data
)))
323 (defmethod registry-prune ((db registry-db
) &optional sortfun
)
324 "Prunes the registry-db object THIS.
325 Removes only entries without the :precious keys if it can,
326 then removes oldest entries first.
327 Returns the number of deleted entries.
328 If SORTFUN is given, tries to keep entries that sort *higher*.
329 SORTFUN is passed only the two keys so it must look them up directly."
330 (dolist (collector '(registry-prune-soft-candidates
331 registry-prune-hard-candidates
))
332 (let* ((size (registry-size db
))
333 (collected (funcall collector db
))
334 (limit (nth 0 collected
))
335 (candidates (nth 1 collected
))
336 ;; sort the candidates if SORTFUN was given
337 (candidates (if sortfun
(sort candidates sortfun
) candidates
))
338 (candidates-count (length candidates
))
339 ;; are we over max-soft?
340 (prune-needed (> size limit
)))
342 ;; while we have more candidates than we need to remove...
343 (while (and (> candidates-count
(- size limit
)) candidates
)
344 (decf candidates-count
)
345 (setq candidates
(cdr candidates
)))
347 (registry-delete db candidates nil
)
348 (length candidates
))))
350 (defmethod registry-prune-soft-candidates ((db registry-db
))
351 "Collects pruning candidates from the registry-db object THIS.
352 Proposes only entries without the :precious keys."
353 (let* ((precious (oref db
:precious
))
354 (precious-p (lambda (entry-key)
355 (cdr (memq (car entry-key
) precious
))))
356 (data (oref db
:data
))
357 (limit (oref db
:max-soft
))
358 (candidates (loop for k being the hash-keys of data
359 using
(hash-values v
)
360 when
(notany precious-p v
)
362 (list limit candidates
)))
364 (defmethod registry-prune-hard-candidates ((db registry-db
))
365 "Collects pruning candidates from the registry-db object THIS.
366 Proposes any entries over the max-hard limit minus size * prune-factor."
367 (let* ((data (oref db
:data
))
368 ;; prune to (size * prune-factor) below the max-hard limit so
369 ;; we're not pruning all the time
370 (limit (max 0 (- (oref db
:max-hard
)
371 (* (registry-size db
) (oref db
:prune-factor
)))))
372 (candidates (loop for k being the hash-keys of data
374 (list limit candidates
))))
376 (ert-deftest registry-instantiation-test
()
377 (should (registry-db "Testing")))
379 (ert-deftest registry-match-test
()
380 (let ((entry '((hello "goodbye" "bye") (blank))))
382 (message "Testing :regex matching")
383 (should (registry--match :regex entry
'((hello "nye" "bye"))))
384 (should (registry--match :regex entry
'((hello "good"))))
385 (should-not (registry--match :regex entry
'((hello "nye"))))
386 (should-not (registry--match :regex entry
'((hello))))
388 (message "Testing :member matching")
389 (should (registry--match :member entry
'((hello "bye"))))
390 (should (registry--match :member entry
'((hello "goodbye"))))
391 (should-not (registry--match :member entry
'((hello "good"))))
392 (should-not (registry--match :member entry
'((hello "nye"))))
393 (should-not (registry--match :member entry
'((hello)))))
394 (message "Done with matching testing."))
396 (defun registry-make-testable-db (n &optional name file
)
397 (let* ((db (registry-db
399 :file
(or file
"unused")
401 :max-soft
0 ; keep nothing not precious
402 :precious
'(extra more-extra
)
403 :tracked
'(sender subject groups
))))
405 (registry-insert db i
`((sender "me")
406 (subject "about you")
407 (more-extra) ; empty data key should be pruned
408 ;; first 5 entries will NOT have this extra data
409 ,@(when (< 5 i
) (list (list 'extra
"more data")))
410 (groups ,(number-to-string i
)))))
413 (ert-deftest registry-usage-test
()
415 (db (registry-make-testable-db n
)))
416 (message "size %d" n
)
417 (should (= n
(registry-size db
)))
418 (message "max-hard test")
419 (should-error (registry-insert db
"new" '()))
420 (message "Individual lookup")
421 (should (= 58 (caadr (registry-lookup db
'(1 58 99)))))
422 (message "Grouped individual lookup")
423 (should (= 3 (length (registry-lookup db
'(1 58 99)))))
424 (when (boundp 'lexical-binding
)
425 (message "Individual lookup (breaks before lexbind)")
427 (caadr (registry-lookup-breaks-before-lexbind db
'(1 58 99)))))
428 (message "Grouped individual lookup (breaks before lexbind)")
430 (length (registry-lookup-breaks-before-lexbind db
433 (should (= n
(length (registry-search db
:all t
))))
434 (should (= n
(length (registry-search db
:member
'((sender "me"))))))
435 (message "Secondary index search")
436 (should (= n
(length (registry-lookup-secondary-value db
'sender
"me"))))
437 (should (equal '(74) (registry-lookup-secondary-value db
'groups
"74")))
439 (should (registry-delete db
'(1) t
))
441 (message "Search after delete")
442 (should (= n
(length (registry-search db
:all t
))))
443 (message "Secondary search after delete")
444 (should (= n
(length (registry-lookup-secondary-value db
'sender
"me"))))
445 ;; (message "Pruning")
446 ;; (let* ((tokeep (registry-search db :member '((extra "more data"))))
447 ;; (count (- n (length tokeep)))
448 ;; (pruned (registry-prune db))
449 ;; (prune-count (length pruned)))
450 ;; (message "Expecting to prune %d entries and pruned %d"
451 ;; count prune-count)
452 ;; (should (and (= count 5)
453 ;; (= count prune-count))))
454 (message "Done with usage testing.")))
456 (ert-deftest registry-persistence-test
()
458 (tempfile (make-temp-file "registry-persistence-"))
459 (name "persistence tester")
460 (db (registry-make-testable-db n name tempfile
))
462 (message "Saving to %s" tempfile
)
463 (eieio-persistent-save db
)
464 (setq size
(nth 7 (file-attributes tempfile
)))
465 (message "Saved to %s: size %d" tempfile size
)
468 (insert-file-contents-literally tempfile
)
469 (should (looking-at (concat ";; Object "
471 "\n;; EIEIO PERSISTENT OBJECT"))))
472 (message "Reading object back")
473 (setq back
(eieio-persistent-read tempfile
))
475 (message "Read object back: %d keys, expected %d==%d"
476 (registry-size back
) n
(registry-size db
))
477 (should (= (registry-size back
) n
))
478 (should (= (registry-size back
) (registry-size db
)))
479 (delete-file tempfile
))
480 (message "Done with persistence testing."))
483 ;;; registry.el ends here