1 ;;; registry.el --- Track and remember data items by various fields
3 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
5 ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
25 ;; This library provides a general-purpose EIEIO-based registry
26 ;; database with persistence, initialized with these fields:
30 ;; max-size: an integer, default most-positive-fixnum
32 ;; prune-factor: a float between 0 and 1, default 0.1
34 ;; precious: a list of symbols
36 ;; tracked: a list of symbols
38 ;; tracker: a hash table tuned for 100 symbols to track (you should
39 ;; only access this with the :lookup2-function and the
40 ;; :lookup2+-function)
42 ;; data: a hash table with default size 10K and resize threshold 2.0
43 ;; (this reflects the expected usage so override it if you know better)
45 ;; ...plus methods to do all the work: `registry-search',
46 ;; `registry-lookup', `registry-lookup-secondary',
47 ;; `registry-lookup-secondary-value', `registry-insert',
48 ;; `registry-delete', `registry-prune', `registry-size' which see
50 ;; and with the following properties:
52 ;; Every piece of data has a unique ID and some general-purpose fields
53 ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
55 ;; ((F1 D1) (F2 D2) (F3 a b c))
57 ;; Note that whether a field has one or many pieces of data, the data
58 ;; is always a list of values.
60 ;; The user decides which fields are "precious", F2 for example. When
61 ;; the registry is pruned, any entries without the F2 field will be
62 ;; removed until the size is :max-size * :prune-factor _less_ than the
63 ;; maximum database size. No entries with the F2 field will be removed
64 ;; at PRUNE TIME, which means it may not be possible to prune back all
65 ;; the way to the target size.
67 ;; When an entry is inserted, the registry will reject new entries if
68 ;; they bring it over the :max-size limit, even if they have the F2
71 ;; The user decides which fields are "tracked", F1 for example. Any
72 ;; new entry is then indexed by all the tracked fields so it can be
73 ;; quickly looked up that way. The data is always a list (see example
74 ;; above) and each list element is indexed.
76 ;; Precious and tracked field names must be symbols. All other
77 ;; fields can be any other Emacs Lisp types.
85 ;; The version number needs to be kept outside of the class definition
86 ;; itself. The persistent-save process does *not* write to file any
87 ;; slot values that are equal to the default :initform value. If a
88 ;; database object is at the most recent version, therefore, its
89 ;; version number will not be written to file. That makes it
90 ;; difficult to know when a database needs to be upgraded.
91 (defvar registry-db-version
0.2
92 "The current version of the registry format.")
94 (defclass registry-db
(eieio-persistent)
95 ((version :initarg
:version
98 :documentation
"The registry version.")
99 (max-size :initarg
:max-size
100 ;; EIEIO's :initform is not 100% compatible with CLOS in
101 ;; that if the form is an atom, it assumes it's constant
102 ;; value rather than an expression, so in order to get the value
103 ;; of `most-positive-fixnum', we need to use an
104 ;; expression that's not just a symbol.
105 :initform
(symbol-value 'most-positive-fixnum
)
108 :documentation
"The maximum number of registry entries.")
110 :initarg
:prune-factor
114 :documentation
"Prune to (:max-size * :prune-factor) less
115 than the :max-size limit. Should be a float between 0 and 1.")
116 (tracked :initarg
:tracked
119 :documentation
"The tracked (indexed) fields, a list of symbols.")
120 (precious :initarg
:precious
123 :documentation
"The precious fields, a list of symbols.")
124 (tracker :initarg
:tracker
126 :documentation
"The field tracking hash table.")
129 :documentation
"The data hash table.")))
131 (cl-defmethod initialize-instance :before
((this registry-db
) slots
)
132 "Check whether a registry object needs to be upgraded."
133 ;; Hardcoded upgrade routines. Version 0.1 to 0.2 requires the
134 ;; :max-soft slot to disappear, and the :max-hard slot to be renamed
136 (let ((current-version
137 (and (plist-member slots
:version
)
138 (plist-get slots
:version
))))
139 (when (or (null current-version
)
140 (eql current-version
0.1))
142 (plist-put slots
:max-size
(plist-get slots
:max-hard
)))
144 (plist-put slots
:version registry-db-version
))
145 (cl-remf slots
:max-hard
)
146 (cl-remf slots
:max-soft
))))
148 (cl-defmethod initialize-instance :after
((this registry-db
) slots
)
149 "Set value of data slot of THIS after initialization."
150 (with-slots (data tracker
) this
151 (unless (member :data slots
)
153 (make-hash-table :size
10000 :rehash-size
2.0 :test
'equal
)))
154 (unless (member :tracker slots
)
155 (setq tracker
(make-hash-table :size
100 :rehash-size
2.0)))))
157 (cl-defmethod registry-lookup ((db registry-db
) keys
)
158 "Search for KEYS in the registry-db THIS.
159 Returns an alist of the key followed by the entry in a list, not a cons cell."
160 (let ((data (oref db data
)))
164 (when (gethash k data
)
165 (list k
(gethash k data
))))
168 (cl-defmethod registry-lookup-breaks-before-lexbind ((db registry-db
) keys
)
169 "Search for KEYS in the registry-db THIS.
170 Returns an alist of the key followed by the entry in a list, not a cons cell."
171 (let ((data (oref db data
)))
173 (cl-loop for key in keys
174 when
(gethash key data
)
175 collect
(list key
(gethash key data
))))))
177 (cl-defmethod registry-lookup-secondary ((db registry-db
) tracksym
179 "Search for TRACKSYM in the registry-db THIS.
180 When CREATE is not nil, create the secondary index hash table if needed."
181 (let ((h (gethash tracksym
(oref db tracker
))))
186 (make-hash-table :size
800 :rehash-size
2.0 :test
'equal
)
188 (gethash tracksym
(oref db tracker
))))))
190 (cl-defmethod registry-lookup-secondary-value ((db registry-db
) tracksym val
192 "Search for TRACKSYM with value VAL in the registry-db THIS.
193 When SET is not nil, set it for VAL (use t for an empty list)."
194 ;; either we're asked for creation or there should be an existing index
195 (when (or set
(registry-lookup-secondary db tracksym
))
196 ;; set the entry if requested,
198 (puthash val
(if (eq t set
) '() set
)
199 (registry-lookup-secondary db tracksym t
)))
200 (gethash val
(registry-lookup-secondary db tracksym
))))
202 (defun registry--match (mode entry check-list
)
205 (let ((key (nth 0 (nth 0 check-list
)))
206 (vals (cdr-safe (nth 0 check-list
)))
208 (while (and key vals
(not found
))
209 (setq found
(cl-case mode
211 (member (car-safe vals
) (cdr-safe (assoc key entry
))))
213 (string-match (car vals
)
216 (cdr-safe (assoc key entry
))
218 vals
(cdr-safe vals
)))
220 (registry--match mode entry
(cdr-safe check-list
))))))
222 (cl-defmethod registry-search ((db registry-db
) &rest spec
)
223 "Search for SPEC across the registry-db THIS.
224 For example calling with `:member \\='(a 1 2)' will match entry \((a 3 1)).
225 Calling with `:all t' (any non-nil value) will match all.
226 Calling with `:regex \\='(a \"h.llo\")' will match entry \(a \"hullo\" \"bye\").
227 The test order is to check :all first, then :member, then :regex."
229 (let ((all (plist-get spec
:all
))
230 (member (plist-get spec
:member
))
231 (regex (plist-get spec
:regex
)))
232 (cl-loop for k being the hash-keys of
(oref db data
)
233 using
(hash-values v
)
235 ;; :all non-nil returns all
238 (and member
(registry--match :member v member
))
240 (and regex
(registry--match :regex v regex
)))
243 (cl-defmethod registry-delete ((db registry-db
) keys assert
&rest spec
)
244 "Delete KEYS from the registry-db THIS.
245 If KEYS is nil, use SPEC to do a search.
246 Updates the secondary ('tracked') indices as well.
247 With assert non-nil, errors out if the key does not exist already."
248 (let* ((data (oref db data
))
250 (apply 'registry-search db spec
)))
251 (tracked (oref db tracked
)))
254 (let ((entry (gethash key data
)))
256 (cl-assert entry nil
"Key %s does not exist in database" key
))
257 ;; clean entry from the secondary indices
259 ;; is this tracked symbol indexed?
260 (when (registry-lookup-secondary db tr
)
261 ;; for every value in the entry under that key...
262 (dolist (val (cdr-safe (assq tr entry
)))
263 (let* ((value-keys (registry-lookup-secondary-value
265 (when (member key value-keys
)
266 ;; override the previous value
267 (registry-lookup-secondary-value
269 ;; with the indexed keys MINUS the current key
270 ;; (we pass t when the list is empty)
271 (or (delete key value-keys
) t
)))))))
275 (cl-defmethod registry-size ((db registry-db
))
276 "Returns the size of the registry-db object THIS.
277 This is the key count of the `data' slot."
278 (hash-table-count (oref db data
)))
280 (cl-defmethod registry-full ((db registry-db
))
281 "Checks if registry-db THIS is full."
282 (>= (registry-size db
)
285 (cl-defmethod registry-insert ((db registry-db
) key entry
)
286 "Insert ENTRY under KEY into the registry-db THIS.
287 Updates the secondary ('tracked') indices as well.
288 Errors out if the key exists already."
289 (cl-assert (not (gethash key
(oref db data
))) nil
290 "Key already exists in database")
291 (cl-assert (not (registry-full db
)) nil
292 "registry max-size limit reached")
295 (puthash key entry
(oref db data
))
297 ;; store the secondary indices
298 (dolist (tr (oref db tracked
))
299 ;; for every value in the entry under that key...
300 (dolist (val (cdr-safe (assq tr entry
)))
301 (let* ((value-keys (registry-lookup-secondary-value db tr val
)))
302 (cl-pushnew key value-keys
:test
'equal
)
303 (registry-lookup-secondary-value db tr val value-keys
))))
306 (cl-defmethod registry-reindex ((db registry-db
))
307 "Rebuild the secondary indices of registry-db THIS."
309 (expected (* (length (oref db tracked
)) (registry-size db
))))
310 (dolist (tr (oref db tracked
))
315 (when (and (< 0 expected
)
316 (= 0 (mod count
1000)))
317 (message "reindexing: %d of %d (%.2f%%)"
318 count expected
(/ (* 100.0 count
) expected
)))
319 (dolist (val (cdr-safe (assq tr v
)))
320 (let* ((value-keys (registry-lookup-secondary-value db tr val
)))
321 (push key value-keys
)
322 (registry-lookup-secondary-value db tr val value-keys
))))
325 (cl-defmethod registry-prune ((db registry-db
) &optional sortfunc
)
326 "Prunes the registry-db object DB.
328 Attempts to prune the number of entries down to \(*
329 :max-size :prune-factor) less than the max-size limit, so
330 pruning doesn't need to happen on every save. Removes only
331 entries without the :precious keys, so it may not be possible to
332 reach the target limit.
334 Entries to be pruned are first sorted using SORTFUNC. Entries
335 from the front of the list are deleted first.
337 Returns the number of deleted entries."
338 (let ((size (registry-size db
))
340 (floor (- (oref db max-size
)
341 (* (oref db max-size
)
342 (oref db prune-factor
)))))
344 (if (registry-full db
)
347 (registry-collect-prune-candidates
348 db
(- size target-size
) sortfunc
))
349 (length (registry-delete db candidates nil
)))
352 (cl-defmethod registry-collect-prune-candidates ((db registry-db
)
354 "Collects pruning candidates from the registry-db object DB.
356 Proposes only entries without the :precious keys, and attempts to
357 return LIMIT such candidates. If SORTFUNC is provided, sort
358 entries first and return candidates from beginning of list."
359 (let* ((precious (oref db precious
))
360 (precious-p (lambda (entry-key)
361 (cdr (memq (car-safe entry-key
) precious
))))
362 (data (oref db data
))
363 (candidates (cl-loop for k being the hash-keys of data
364 using
(hash-values v
)
366 (cl-notany precious-p v
))
367 collect
(cons k v
))))
368 ;; We want the full entries for sorting, but should only return a
369 ;; list of entry keys.
371 (setq candidates
(sort candidates sortfunc
)))
372 (cl-subseq (mapcar #'car candidates
) 0 (min limit
(length candidates
)))))
375 ;;; registry.el ends here