file kaio_read.c was initially added on branch fedora-branch.
[glibc/history.git] / nscd / cache.c
blobbe9be2aa4f1e2e8597f9ea91bb3b14f05bf7e61c
1 /* Copyright (c) 1998, 1999, 2003-2005, 2006 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License version 2 as
7 published by the Free Software Foundation.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 #include <assert.h>
19 #include <atomic.h>
20 #include <errno.h>
21 #include <error.h>
22 #include <inttypes.h>
23 #include <limits.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <libintl.h>
27 #include <arpa/inet.h>
28 #include <rpcsvc/nis.h>
29 #include <sys/mman.h>
30 #include <sys/param.h>
31 #include <sys/stat.h>
32 #include <sys/uio.h>
34 #include "nscd.h"
35 #include "dbg_log.h"
38 /* Number of times a value is reloaded without being used. UINT_MAX
39 means unlimited. */
40 unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
43 /* Search the cache for a matching entry and return it when found. If
44 this fails search the negative cache and return (void *) -1 if this
45 search was successful. Otherwise return NULL.
47 This function must be called with the read-lock held. */
48 struct datahead *
49 cache_search (request_type type, void *key, size_t len,
50 struct database_dyn *table, uid_t owner)
52 unsigned long int hash = __nis_hash (key, len) % table->head->module;
54 unsigned long int nsearched = 0;
55 struct datahead *result = NULL;
57 ref_t work = table->head->array[hash];
58 while (work != ENDREF)
60 ++nsearched;
62 struct hashentry *here = (struct hashentry *) (table->data + work);
64 if (type == here->type && len == here->len
65 && memcmp (key, table->data + here->key, len) == 0
66 && here->owner == owner)
68 /* We found the entry. Increment the appropriate counter. */
69 struct datahead *dh
70 = (struct datahead *) (table->data + here->packet);
72 /* See whether we must ignore the entry. */
73 if (dh->usable)
75 /* We do not synchronize the memory here. The statistics
76 data is not crucial, we synchronize only once in a while
77 in the cleanup threads. */
78 if (dh->notfound)
79 ++table->head->neghit;
80 else
82 ++table->head->poshit;
84 if (dh->nreloads != 0)
85 dh->nreloads = 0;
88 result = dh;
89 break;
93 work = here->next;
96 if (nsearched > table->head->maxnsearched)
97 table->head->maxnsearched = nsearched;
99 return result;
102 /* Add a new entry to the cache. The return value is zero if the function
103 call was successful.
105 This function must be called with the read-lock held.
107 We modify the table but we nevertheless only acquire a read-lock.
108 This is ok since we use operations which would be safe even without
109 locking, given that the `prune_cache' function never runs. Using
110 the readlock reduces the chance of conflicts. */
112 cache_add (int type, const void *key, size_t len, struct datahead *packet,
113 bool first, struct database_dyn *table,
114 uid_t owner)
116 if (__builtin_expect (debug_level >= 2, 0))
118 const char *str;
119 char buf[INET6_ADDRSTRLEN + 1];
120 if (type == GETHOSTBYADDR || type == GETHOSTBYADDRv6)
121 str = inet_ntop (type == GETHOSTBYADDR ? AF_INET : AF_INET6,
122 key, buf, sizeof (buf));
123 else
124 str = key;
126 dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
127 str, serv2str[type], dbnames[table - dbs],
128 first ? " (first)" : "");
131 unsigned long int hash = __nis_hash (key, len) % table->head->module;
132 struct hashentry *newp;
134 newp = mempool_alloc (table, sizeof (struct hashentry));
135 /* If we cannot allocate memory, just do not do anything. */
136 if (newp == NULL)
137 return -1;
139 newp->type = type;
140 newp->first = first;
141 newp->len = len;
142 newp->key = (char *) key - table->data;
143 assert (newp->key + newp->len <= table->head->first_free);
144 newp->owner = owner;
145 newp->packet = (char *) packet - table->data;
147 /* Put the new entry in the first position. */
149 newp->next = table->head->array[hash];
150 while (atomic_compare_and_exchange_bool_acq (&table->head->array[hash],
151 (ref_t) ((char *) newp
152 - table->data),
153 (ref_t) newp->next));
155 /* Update the statistics. */
156 if (packet->notfound)
157 ++table->head->negmiss;
158 else if (first)
159 ++table->head->posmiss;
161 /* We depend on this value being correct and at least as high as the
162 real number of entries. */
163 atomic_increment (&table->head->nentries);
165 /* It does not matter that we are not loading the just increment
166 value, this is just for statistics. */
167 unsigned long int nentries = table->head->nentries;
168 if (nentries > table->head->maxnentries)
169 table->head->maxnentries = nentries;
171 if (table->persistent)
172 // XXX async OK?
173 msync ((void *) table->head,
174 (char *) &table->head->array[hash] - (char *) table->head
175 + sizeof (ref_t), MS_ASYNC);
177 return 0;
180 /* Walk through the table and remove all entries which lifetime ended.
182 We have a problem here. To actually remove the entries we must get
183 the write-lock. But since we want to keep the time we have the
184 lock as short as possible we cannot simply acquire the lock when we
185 start looking for timedout entries.
187 Therefore we do it in two stages: first we look for entries which
188 must be invalidated and remember them. Then we get the lock and
189 actually remove them. This is complicated by the way we have to
190 free the data structures since some hash table entries share the same
191 data. */
192 void
193 prune_cache (struct database_dyn *table, time_t now, int fd)
195 size_t cnt = table->head->module;
197 /* If this table is not actually used don't do anything. */
198 if (cnt == 0)
200 if (fd != -1)
202 /* Reply to the INVALIDATE initiator. */
203 int32_t resp = 0;
204 writeall (fd, &resp, sizeof (resp));
206 return;
209 /* This function can be called from the cleanup thread but also in
210 response to an invalidate command. Make sure only one thread is
211 running. When not serving INVALIDATE request, no need for the
212 second to wait around. */
213 if (fd == -1)
215 if (pthread_mutex_trylock (&table->prunelock) != 0)
216 /* The work is already being done. */
217 return;
219 else
220 pthread_mutex_lock (&table->prunelock);
222 /* If we check for the modification of the underlying file we invalidate
223 the entries also in this case. */
224 if (table->check_file)
226 struct stat64 st;
228 if (stat64 (table->filename, &st) < 0)
230 char buf[128];
231 /* We cannot stat() the file, disable file checking if the
232 file does not exist. */
233 dbg_log (_("cannot stat() file `%s': %s"),
234 table->filename, strerror_r (errno, buf, sizeof (buf)));
235 if (errno == ENOENT)
236 table->check_file = 0;
238 else
240 if (st.st_mtime != table->file_mtime)
242 /* The file changed. Invalidate all entries. */
243 now = LONG_MAX;
244 table->file_mtime = st.st_mtime;
249 /* We run through the table and find values which are not valid anymore.
251 Note that for the initial step, finding the entries to be removed,
252 we don't need to get any lock. It is at all timed assured that the
253 linked lists are set up correctly and that no second thread prunes
254 the cache. */
255 bool mark[cnt];
256 size_t first = cnt + 1;
257 size_t last = 0;
258 char *const data = table->data;
259 bool any = false;
261 if (__builtin_expect (debug_level > 2, 0))
262 dbg_log (_("pruning %s cache; time %ld"),
263 dbnames[table - dbs], (long int) now);
267 ref_t run = table->head->array[--cnt];
269 while (run != ENDREF)
271 struct hashentry *runp = (struct hashentry *) (data + run);
272 struct datahead *dh = (struct datahead *) (data + runp->packet);
274 /* Some debug support. */
275 if (__builtin_expect (debug_level > 2, 0))
277 char buf[INET6_ADDRSTRLEN];
278 const char *str;
280 if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
282 inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
283 data + runp->key, buf, sizeof (buf));
284 str = buf;
286 else
287 str = data + runp->key;
289 dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
290 serv2str[runp->type], str, dh->timeout);
293 /* Check whether the entry timed out. */
294 if (dh->timeout < now)
296 /* This hash bucket could contain entries which need to
297 be looked at. */
298 mark[cnt] = true;
300 first = MIN (first, cnt);
301 last = MAX (last, cnt);
303 /* We only have to look at the data of the first entries
304 since the count information is kept in the data part
305 which is shared. */
306 if (runp->first)
309 /* At this point there are two choices: we reload the
310 value or we discard it. Do not change NRELOADS if
311 we never not reload the record. */
312 if ((reload_count != UINT_MAX
313 && __builtin_expect (dh->nreloads >= reload_count, 0))
314 /* We always remove negative entries. */
315 || dh->notfound
316 /* Discard everything if the user explicitly
317 requests it. */
318 || now == LONG_MAX)
320 /* Remove the value. */
321 dh->usable = false;
323 /* We definitely have some garbage entries now. */
324 any = true;
326 else
328 /* Reload the value. We do this only for the
329 initially used key, not the additionally
330 added derived value. */
331 switch (runp->type)
333 case GETPWBYNAME:
334 readdpwbyname (table, runp, dh);
335 break;
337 case GETPWBYUID:
338 readdpwbyuid (table, runp, dh);
339 break;
341 case GETGRBYNAME:
342 readdgrbyname (table, runp, dh);
343 break;
345 case GETGRBYGID:
346 readdgrbygid (table, runp, dh);
347 break;
349 case GETHOSTBYNAME:
350 readdhstbyname (table, runp, dh);
351 break;
353 case GETHOSTBYNAMEv6:
354 readdhstbynamev6 (table, runp, dh);
355 break;
357 case GETHOSTBYADDR:
358 readdhstbyaddr (table, runp, dh);
359 break;
361 case GETHOSTBYADDRv6:
362 readdhstbyaddrv6 (table, runp, dh);
363 break;
365 case GETAI:
366 readdhstai (table, runp, dh);
367 break;
369 case INITGROUPS:
370 readdinitgroups (table, runp, dh);
371 break;
373 default:
374 assert (! "should never happen");
377 /* If the entry has been replaced, we might need
378 cleanup. */
379 any |= !dh->usable;
383 else
384 assert (dh->usable);
386 run = runp->next;
389 while (cnt > 0);
391 if (fd != -1)
393 /* Reply to the INVALIDATE initiator that the cache has been
394 invalidated. */
395 int32_t resp = 0;
396 writeall (fd, &resp, sizeof (resp));
399 if (first <= last)
401 struct hashentry *head = NULL;
403 /* Now we have to get the write lock since we are about to modify
404 the table. */
405 if (__builtin_expect (pthread_rwlock_trywrlock (&table->lock) != 0, 0))
407 ++table->head->wrlockdelayed;
408 pthread_rwlock_wrlock (&table->lock);
411 while (first <= last)
413 if (mark[first])
415 ref_t *old = &table->head->array[first];
416 ref_t run = table->head->array[first];
418 while (run != ENDREF)
420 struct hashentry *runp = (struct hashentry *) (data + run);
421 struct datahead *dh
422 = (struct datahead *) (data + runp->packet);
424 if (! dh->usable)
426 /* We need the list only for debugging but it is
427 more costly to avoid creating the list than
428 doing it. */
429 runp->dellist = head;
430 head = runp;
432 /* No need for an atomic operation, we have the
433 write lock. */
434 --table->head->nentries;
436 run = *old = runp->next;
438 else
440 old = &runp->next;
441 run = runp->next;
446 ++first;
449 /* It's all done. */
450 pthread_rwlock_unlock (&table->lock);
452 /* Make sure the data is saved to disk. */
453 if (table->persistent)
454 msync (table->head,
455 data + table->head->first_free - (char *) table->head,
456 MS_ASYNC);
458 /* One extra pass if we do debugging. */
459 if (__builtin_expect (debug_level > 0, 0))
461 struct hashentry *runp = head;
463 while (runp != NULL)
465 char buf[INET6_ADDRSTRLEN];
466 const char *str;
468 if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
470 inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
471 data + runp->key, buf, sizeof (buf));
472 str = buf;
474 else
475 str = data + runp->key;
477 dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str);
479 runp = runp->dellist;
484 /* Run garbage collection if any entry has been removed or replaced. */
485 if (any)
486 gc (table);
488 pthread_mutex_unlock (&table->prunelock);