merge trunk
[emacs.git] / lisp / gnus / registry.el
blob24a3aa07ffaa28c4993fe59459c8d592c1e77d1a
1 ;;; registry.el --- Track and remember data items by various fields
3 ;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
5 ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
6 ;; Keywords: data
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; This library provides a general-purpose EIEIO-based registry
26 ;; database with persistence, initialized with these fields:
28 ;; version: a float, 0.1 currently (don't change it)
30 ;; max-hard: an integer, default 5000000
32 ;; max-soft: an integer, default 50000
34 ;; precious: a list of symbols
36 ;; tracked: a list of symbols
38 ;; tracker: a hashtable tuned for 100 symbols to track (you should
39 ;; only access this with the :lookup2-function and the
40 ;; :lookup2+-function)
42 ;; data: a hashtable with default size 10K and resize threshold 2.0
43 ;; (this reflects the expected usage so override it if you know better)
45 ;; ...plus methods to do all the work: `registry-search',
46 ;; `registry-lookup', `registry-lookup-secondary',
47 ;; `registry-lookup-secondary-value', `registry-insert',
48 ;; `registry-delete', `registry-prune', `registry-size' which see
50 ;; and with the following properties:
52 ;; Every piece of data has a unique ID and some general-purpose fields
53 ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
55 ;; ((F1 D1) (F2 D2) (F3 a b c))
57 ;; Note that whether a field has one or many pieces of data, the data
58 ;; is always a list of values.
60 ;; The user decides which fields are "precious", F2 for example. At
61 ;; PRUNE TIME (when the :prune-function is called), the registry will
62 ;; trim any entries without the F2 field until the size is :max-soft
63 ;; or less. No entries with the F2 field will be removed at PRUNE
64 ;; TIME.
66 ;; When an entry is inserted, the registry will reject new entries
67 ;; if they bring it over the max-hard limit, even if they have the F2
68 ;; field.
70 ;; The user decides which fields are "tracked", F1 for example. Any
71 ;; new entry is then indexed by all the tracked fields so it can be
72 ;; quickly looked up that way. The data is always a list (see example
73 ;; above) and each list element is indexed.
75 ;; Precious and tracked field names must be symbols. All other
76 ;; fields can be any other Emacs Lisp types.
78 ;;; Code:
80 (eval-when-compile (require 'cl))
82 (require 'eieio)
83 (require 'eieio-base)
85 (defclass registry-db (eieio-persistent)
86 ((version :initarg :version
87 :initform 0.1
88 :type float
89 :custom float
90 :documentation "The registry version.")
91 (max-hard :initarg :max-hard
92 :initform 5000000
93 :type integer
94 :custom integer
95 :documentation "Never accept more than this many elements.")
96 (max-soft :initarg :max-soft
97 :initform 50000
98 :type integer
99 :custom integer
100 :documentation "Prune as much as possible to get to this size.")
101 (prune-factor
102 :initarg :prune-factor
103 :initform 0.1
104 :type float
105 :custom float
106 :documentation "At the max-hard limit, prune size * this entries.")
107 (tracked :initarg :tracked
108 :initform nil
109 :type t
110 :documentation "The tracked (indexed) fields, a list of symbols.")
111 (precious :initarg :precious
112 :initform nil
113 :type t
114 :documentation "The precious fields, a list of symbols.")
115 (tracker :initarg :tracker
116 :type hash-table
117 :documentation "The field tracking hashtable.")
118 (data :initarg :data
119 :type hash-table
120 :documentation "The data hashtable.")))
122 (defmethod initialize-instance :AFTER ((this registry-db) slots)
123 "Set value of data slot of THIS after initialization."
124 (with-slots (data tracker) this
125 (unless (member :data slots)
126 (setq data
127 (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
128 (unless (member :tracker slots)
129 (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))
131 (defmethod registry-lookup ((db registry-db) keys)
132 "Search for KEYS in the registry-db THIS.
133 Returns an alist of the key followed by the entry in a list, not a cons cell."
134 (let ((data (oref db :data)))
135 (delq nil
136 (mapcar
137 (lambda (k)
138 (when (gethash k data)
139 (list k (gethash k data))))
140 keys))))
142 (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
143 "Search for KEYS in the registry-db THIS.
144 Returns an alist of the key followed by the entry in a list, not a cons cell."
145 (let ((data (oref db :data)))
146 (delq nil
147 (loop for key in keys
148 when (gethash key data)
149 collect (list key (gethash key data))))))
151 (defmethod registry-lookup-secondary ((db registry-db) tracksym
152 &optional create)
153 "Search for TRACKSYM in the registry-db THIS.
154 When CREATE is not nil, create the secondary index hashtable if needed."
155 (let ((h (gethash tracksym (oref db :tracker))))
156 (if h
158 (when create
159 (puthash tracksym
160 (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
161 (oref db :tracker))
162 (gethash tracksym (oref db :tracker))))))
164 (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
165 &optional set)
166 "Search for TRACKSYM with value VAL in the registry-db THIS.
167 When SET is not nil, set it for VAL (use t for an empty list)."
168 ;; either we're asked for creation or there should be an existing index
169 (when (or set (registry-lookup-secondary db tracksym))
170 ;; set the entry if requested,
171 (when set
172 (puthash val (if (eq t set) '() set)
173 (registry-lookup-secondary db tracksym t)))
174 (gethash val (registry-lookup-secondary db tracksym))))
176 (defun registry--match (mode entry check-list)
177 ;; for all members
178 (when check-list
179 (let ((key (nth 0 (nth 0 check-list)))
180 (vals (cdr-safe (nth 0 check-list)))
181 found)
182 (while (and key vals (not found))
183 (setq found (case mode
184 (:member
185 (member (car-safe vals) (cdr-safe (assoc key entry))))
186 (:regex
187 (string-match (car vals)
188 (mapconcat
189 'prin1-to-string
190 (cdr-safe (assoc key entry))
191 "\0"))))
192 vals (cdr-safe vals)))
193 (or found
194 (registry--match mode entry (cdr-safe check-list))))))
196 (defmethod registry-search ((db registry-db) &rest spec)
197 "Search for SPEC across the registry-db THIS.
198 For example calling with :member '(a 1 2) will match entry '((a 3 1)).
199 Calling with :all t (any non-nil value) will match all.
200 Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
201 The test order is to check :all first, then :member, then :regex."
202 (when db
203 (let ((all (plist-get spec :all))
204 (member (plist-get spec :member))
205 (regex (plist-get spec :regex)))
206 (loop for k being the hash-keys of (oref db :data)
207 using (hash-values v)
208 when (or
209 ;; :all non-nil returns all
211 ;; member matching
212 (and member (registry--match :member v member))
213 ;; regex matching
214 (and regex (registry--match :regex v regex)))
215 collect k))))
217 (defmethod registry-delete ((db registry-db) keys assert &rest spec)
218 "Delete KEYS from the registry-db THIS.
219 If KEYS is nil, use SPEC to do a search.
220 Updates the secondary ('tracked') indices as well.
221 With assert non-nil, errors out if the key does not exist already."
222 (let* ((data (oref db :data))
223 (keys (or keys
224 (apply 'registry-search db spec)))
225 (tracked (oref db :tracked)))
227 (dolist (key keys)
228 (let ((entry (gethash key data)))
229 (when assert
230 (assert entry nil
231 "Key %s does not exist in database" key))
232 ;; clean entry from the secondary indices
233 (dolist (tr tracked)
234 ;; is this tracked symbol indexed?
235 (when (registry-lookup-secondary db tr)
236 ;; for every value in the entry under that key...
237 (dolist (val (cdr-safe (assq tr entry)))
238 (let* ((value-keys (registry-lookup-secondary-value
239 db tr val)))
240 (when (member key value-keys)
241 ;; override the previous value
242 (registry-lookup-secondary-value
243 db tr val
244 ;; with the indexed keys MINUS the current key
245 ;; (we pass t when the list is empty)
246 (or (delete key value-keys) t)))))))
247 (remhash key data)))
248 keys))
250 (defmethod registry-size ((db registry-db))
251 "Returns the size of the registry-db object THIS.
252 This is the key count of the :data slot."
253 (hash-table-count (oref db :data)))
255 (defmethod registry-full ((db registry-db))
256 "Checks if registry-db THIS is full."
257 (>= (registry-size db)
258 (oref db :max-hard)))
260 (defmethod registry-insert ((db registry-db) key entry)
261 "Insert ENTRY under KEY into the registry-db THIS.
262 Updates the secondary ('tracked') indices as well.
263 Errors out if the key exists already."
265 (assert (not (gethash key (oref db :data))) nil
266 "Key already exists in database")
268 (assert (not (registry-full db))
270 "registry max-hard size limit reached")
272 ;; store the entry
273 (puthash key entry (oref db :data))
275 ;; store the secondary indices
276 (dolist (tr (oref db :tracked))
277 ;; for every value in the entry under that key...
278 (dolist (val (cdr-safe (assq tr entry)))
279 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
280 (pushnew key value-keys :test 'equal)
281 (registry-lookup-secondary-value db tr val value-keys))))
282 entry)
284 (defmethod registry-reindex ((db registry-db))
285 "Rebuild the secondary indices of registry-db THIS."
286 (let ((count 0)
287 (expected (* (length (oref db :tracked)) (registry-size db))))
288 (dolist (tr (oref db :tracked))
289 (let (values)
290 (maphash
291 (lambda (key v)
292 (incf count)
293 (when (and (< 0 expected)
294 (= 0 (mod count 1000)))
295 (message "reindexing: %d of %d (%.2f%%)"
296 count expected (/ (* 100 count) expected)))
297 (dolist (val (cdr-safe (assq tr v)))
298 (let* ((value-keys (registry-lookup-secondary-value db tr val)))
299 (push key value-keys)
300 (registry-lookup-secondary-value db tr val value-keys))))
301 (oref db :data))))))
303 (defmethod registry-prune ((db registry-db) &optional sortfun)
304 "Prunes the registry-db object THIS.
305 Removes only entries without the :precious keys if it can,
306 then removes oldest entries first.
307 Returns the number of deleted entries.
308 If SORTFUN is given, tries to keep entries that sort *higher*.
309 SORTFUN is passed only the two keys so it must look them up directly."
310 (dolist (collector '(registry-prune-soft-candidates
311 registry-prune-hard-candidates))
312 (let* ((size (registry-size db))
313 (collected (funcall collector db))
314 (limit (nth 0 collected))
315 (candidates (nth 1 collected))
316 ;; sort the candidates if SORTFUN was given
317 (candidates (if sortfun (sort candidates sortfun) candidates))
318 (candidates-count (length candidates))
319 ;; are we over max-soft?
320 (prune-needed (> size limit)))
322 ;; while we have more candidates than we need to remove...
323 (while (and (> candidates-count (- size limit)) candidates)
324 (decf candidates-count)
325 (setq candidates (cdr candidates)))
327 (registry-delete db candidates nil)
328 (length candidates))))
330 (defmethod registry-prune-soft-candidates ((db registry-db))
331 "Collects pruning candidates from the registry-db object THIS.
332 Proposes only entries without the :precious keys."
333 (let* ((precious (oref db :precious))
334 (precious-p (lambda (entry-key)
335 (cdr (memq (car entry-key) precious))))
336 (data (oref db :data))
337 (limit (oref db :max-soft))
338 (candidates (loop for k being the hash-keys of data
339 using (hash-values v)
340 when (notany precious-p v)
341 collect k)))
342 (list limit candidates)))
344 (defmethod registry-prune-hard-candidates ((db registry-db))
345 "Collects pruning candidates from the registry-db object THIS.
346 Proposes any entries over the max-hard limit minus size * prune-factor."
347 (let* ((data (oref db :data))
348 ;; prune to (size * prune-factor) below the max-hard limit so
349 ;; we're not pruning all the time
350 (limit (max 0 (- (oref db :max-hard)
351 (* (registry-size db) (oref db :prune-factor)))))
352 (candidates (loop for k being the hash-keys of data
353 collect k)))
354 (list limit candidates)))
356 (provide 'registry)
357 ;;; registry.el ends here