Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
[linux-2.6/mini2440.git] / fs / afs / vlocation.c
blob849fc3160cb5b372c2626545f926ce185c22762b
1 /* AFS volume location management
3 * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
4 * Written by David Howells (dhowells@redhat.com)
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/init.h>
15 #include <linux/sched.h>
16 #include "internal.h"
18 static unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */
19 static unsigned afs_vlocation_update_timeout = 10 * 60;
21 static void afs_vlocation_reaper(struct work_struct *);
22 static void afs_vlocation_updater(struct work_struct *);
24 static LIST_HEAD(afs_vlocation_updates);
25 static LIST_HEAD(afs_vlocation_graveyard);
26 static DEFINE_SPINLOCK(afs_vlocation_updates_lock);
27 static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock);
28 static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper);
29 static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater);
30 static struct workqueue_struct *afs_vlocation_update_worker;
33 * iterate through the VL servers in a cell until one of them admits knowing
34 * about the volume in question
36 static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl,
37 struct key *key,
38 struct afs_cache_vlocation *vldb)
40 struct afs_cell *cell = vl->cell;
41 struct in_addr addr;
42 int count, ret;
44 _enter("%s,%s", cell->name, vl->vldb.name);
46 down_write(&vl->cell->vl_sem);
47 ret = -ENOMEDIUM;
48 for (count = cell->vl_naddrs; count > 0; count--) {
49 addr = cell->vl_addrs[cell->vl_curr_svix];
51 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
53 /* attempt to access the VL server */
54 ret = afs_vl_get_entry_by_name(&addr, key, vl->vldb.name, vldb,
55 &afs_sync_call);
56 switch (ret) {
57 case 0:
58 goto out;
59 case -ENOMEM:
60 case -ENONET:
61 case -ENETUNREACH:
62 case -EHOSTUNREACH:
63 case -ECONNREFUSED:
64 if (ret == -ENOMEM || ret == -ENONET)
65 goto out;
66 goto rotate;
67 case -ENOMEDIUM:
68 goto out;
69 default:
70 ret = -EIO;
71 goto rotate;
74 /* rotate the server records upon lookup failure */
75 rotate:
76 cell->vl_curr_svix++;
77 cell->vl_curr_svix %= cell->vl_naddrs;
80 out:
81 up_write(&vl->cell->vl_sem);
82 _leave(" = %d", ret);
83 return ret;
87 * iterate through the VL servers in a cell until one of them admits knowing
88 * about the volume in question
90 static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl,
91 struct key *key,
92 afs_volid_t volid,
93 afs_voltype_t voltype,
94 struct afs_cache_vlocation *vldb)
96 struct afs_cell *cell = vl->cell;
97 struct in_addr addr;
98 int count, ret;
100 _enter("%s,%x,%d,", cell->name, volid, voltype);
102 down_write(&vl->cell->vl_sem);
103 ret = -ENOMEDIUM;
104 for (count = cell->vl_naddrs; count > 0; count--) {
105 addr = cell->vl_addrs[cell->vl_curr_svix];
107 _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr);
109 /* attempt to access the VL server */
110 ret = afs_vl_get_entry_by_id(&addr, key, volid, voltype, vldb,
111 &afs_sync_call);
112 switch (ret) {
113 case 0:
114 goto out;
115 case -ENOMEM:
116 case -ENONET:
117 case -ENETUNREACH:
118 case -EHOSTUNREACH:
119 case -ECONNREFUSED:
120 if (ret == -ENOMEM || ret == -ENONET)
121 goto out;
122 goto rotate;
123 case -EBUSY:
124 vl->upd_busy_cnt++;
125 if (vl->upd_busy_cnt <= 3) {
126 if (vl->upd_busy_cnt > 1) {
127 /* second+ BUSY - sleep a little bit */
128 set_current_state(TASK_UNINTERRUPTIBLE);
129 schedule_timeout(1);
130 __set_current_state(TASK_RUNNING);
132 continue;
134 break;
135 case -ENOMEDIUM:
136 vl->upd_rej_cnt++;
137 goto rotate;
138 default:
139 ret = -EIO;
140 goto rotate;
143 /* rotate the server records upon lookup failure */
144 rotate:
145 cell->vl_curr_svix++;
146 cell->vl_curr_svix %= cell->vl_naddrs;
147 vl->upd_busy_cnt = 0;
150 out:
151 if (ret < 0 && vl->upd_rej_cnt > 0) {
152 printk(KERN_NOTICE "kAFS:"
153 " Active volume no longer valid '%s'\n",
154 vl->vldb.name);
155 vl->valid = 0;
156 ret = -ENOMEDIUM;
159 up_write(&vl->cell->vl_sem);
160 _leave(" = %d", ret);
161 return ret;
165 * allocate a volume location record
167 static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell,
168 const char *name,
169 size_t namesz)
171 struct afs_vlocation *vl;
173 vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL);
174 if (vl) {
175 vl->cell = cell;
176 vl->state = AFS_VL_NEW;
177 atomic_set(&vl->usage, 1);
178 INIT_LIST_HEAD(&vl->link);
179 INIT_LIST_HEAD(&vl->grave);
180 INIT_LIST_HEAD(&vl->update);
181 init_waitqueue_head(&vl->waitq);
182 spin_lock_init(&vl->lock);
183 memcpy(vl->vldb.name, name, namesz);
186 _leave(" = %p", vl);
187 return vl;
191 * update record if we found it in the cache
193 static int afs_vlocation_update_record(struct afs_vlocation *vl,
194 struct key *key,
195 struct afs_cache_vlocation *vldb)
197 afs_voltype_t voltype;
198 afs_volid_t vid;
199 int ret;
201 /* try to look up a cached volume in the cell VL databases by ID */
202 _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
203 vl->vldb.name,
204 vl->vldb.vidmask,
205 ntohl(vl->vldb.servers[0].s_addr),
206 vl->vldb.srvtmask[0],
207 ntohl(vl->vldb.servers[1].s_addr),
208 vl->vldb.srvtmask[1],
209 ntohl(vl->vldb.servers[2].s_addr),
210 vl->vldb.srvtmask[2]);
212 _debug("Vids: %08x %08x %08x",
213 vl->vldb.vid[0],
214 vl->vldb.vid[1],
215 vl->vldb.vid[2]);
217 if (vl->vldb.vidmask & AFS_VOL_VTM_RW) {
218 vid = vl->vldb.vid[0];
219 voltype = AFSVL_RWVOL;
220 } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) {
221 vid = vl->vldb.vid[1];
222 voltype = AFSVL_ROVOL;
223 } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) {
224 vid = vl->vldb.vid[2];
225 voltype = AFSVL_BACKVOL;
226 } else {
227 BUG();
228 vid = 0;
229 voltype = 0;
232 /* contact the server to make sure the volume is still available
233 * - TODO: need to handle disconnected operation here
235 ret = afs_vlocation_access_vl_by_id(vl, key, vid, voltype, vldb);
236 switch (ret) {
237 /* net error */
238 default:
239 printk(KERN_WARNING "kAFS:"
240 " failed to update volume '%s' (%x) up in '%s': %d\n",
241 vl->vldb.name, vid, vl->cell->name, ret);
242 _leave(" = %d", ret);
243 return ret;
245 /* pulled from local cache into memory */
246 case 0:
247 _leave(" = 0");
248 return 0;
250 /* uh oh... looks like the volume got deleted */
251 case -ENOMEDIUM:
252 printk(KERN_ERR "kAFS:"
253 " volume '%s' (%x) does not exist '%s'\n",
254 vl->vldb.name, vid, vl->cell->name);
256 /* TODO: make existing record unavailable */
257 _leave(" = %d", ret);
258 return ret;
263 * apply the update to a VL record
265 static void afs_vlocation_apply_update(struct afs_vlocation *vl,
266 struct afs_cache_vlocation *vldb)
268 _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }",
269 vldb->name, vldb->vidmask,
270 ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0],
271 ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1],
272 ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]);
274 _debug("Vids: %08x %08x %08x",
275 vldb->vid[0], vldb->vid[1], vldb->vid[2]);
277 if (strcmp(vldb->name, vl->vldb.name) != 0)
278 printk(KERN_NOTICE "kAFS:"
279 " name of volume '%s' changed to '%s' on server\n",
280 vl->vldb.name, vldb->name);
282 vl->vldb = *vldb;
284 #ifdef AFS_CACHING_SUPPORT
285 /* update volume entry in local cache */
286 cachefs_update_cookie(vl->cache);
287 #endif
291 * fill in a volume location record, consulting the cache and the VL server
292 * both
294 static int afs_vlocation_fill_in_record(struct afs_vlocation *vl,
295 struct key *key)
297 struct afs_cache_vlocation vldb;
298 int ret;
300 _enter("");
302 ASSERTCMP(vl->valid, ==, 0);
304 memset(&vldb, 0, sizeof(vldb));
306 /* see if we have an in-cache copy (will set vl->valid if there is) */
307 #ifdef AFS_CACHING_SUPPORT
308 cachefs_acquire_cookie(cell->cache,
309 &afs_volume_cache_index_def,
310 vlocation,
311 &vl->cache);
312 #endif
314 if (vl->valid) {
315 /* try to update a known volume in the cell VL databases by
316 * ID as the name may have changed */
317 _debug("found in cache");
318 ret = afs_vlocation_update_record(vl, key, &vldb);
319 } else {
320 /* try to look up an unknown volume in the cell VL databases by
321 * name */
322 ret = afs_vlocation_access_vl_by_name(vl, key, &vldb);
323 if (ret < 0) {
324 printk("kAFS: failed to locate '%s' in cell '%s'\n",
325 vl->vldb.name, vl->cell->name);
326 return ret;
330 afs_vlocation_apply_update(vl, &vldb);
331 _leave(" = 0");
332 return 0;
336 * queue a vlocation record for updates
338 static void afs_vlocation_queue_for_updates(struct afs_vlocation *vl)
340 struct afs_vlocation *xvl;
342 /* wait at least 10 minutes before updating... */
343 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
345 spin_lock(&afs_vlocation_updates_lock);
347 if (!list_empty(&afs_vlocation_updates)) {
348 /* ... but wait at least 1 second more than the newest record
349 * already queued so that we don't spam the VL server suddenly
350 * with lots of requests
352 xvl = list_entry(afs_vlocation_updates.prev,
353 struct afs_vlocation, update);
354 if (vl->update_at <= xvl->update_at)
355 vl->update_at = xvl->update_at + 1;
356 } else {
357 queue_delayed_work(afs_vlocation_update_worker,
358 &afs_vlocation_update,
359 afs_vlocation_update_timeout * HZ);
362 list_add_tail(&vl->update, &afs_vlocation_updates);
363 spin_unlock(&afs_vlocation_updates_lock);
367 * lookup volume location
368 * - iterate through the VL servers in a cell until one of them admits knowing
369 * about the volume in question
370 * - lookup in the local cache if not able to find on the VL server
371 * - insert/update in the local cache if did get a VL response
373 struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell,
374 struct key *key,
375 const char *name,
376 size_t namesz)
378 struct afs_vlocation *vl;
379 int ret;
381 _enter("{%s},{%x},%*.*s,%zu",
382 cell->name, key_serial(key),
383 (int) namesz, (int) namesz, name, namesz);
385 if (namesz >= sizeof(vl->vldb.name)) {
386 _leave(" = -ENAMETOOLONG");
387 return ERR_PTR(-ENAMETOOLONG);
390 /* see if we have an in-memory copy first */
391 down_write(&cell->vl_sem);
392 spin_lock(&cell->vl_lock);
393 list_for_each_entry(vl, &cell->vl_list, link) {
394 if (vl->vldb.name[namesz] != '\0')
395 continue;
396 if (memcmp(vl->vldb.name, name, namesz) == 0)
397 goto found_in_memory;
399 spin_unlock(&cell->vl_lock);
401 /* not in the cell's in-memory lists - create a new record */
402 vl = afs_vlocation_alloc(cell, name, namesz);
403 if (!vl) {
404 up_write(&cell->vl_sem);
405 return ERR_PTR(-ENOMEM);
408 afs_get_cell(cell);
410 list_add_tail(&vl->link, &cell->vl_list);
411 vl->state = AFS_VL_CREATING;
412 up_write(&cell->vl_sem);
414 fill_in_record:
415 ret = afs_vlocation_fill_in_record(vl, key);
416 if (ret < 0)
417 goto error_abandon;
418 spin_lock(&vl->lock);
419 vl->state = AFS_VL_VALID;
420 spin_unlock(&vl->lock);
421 wake_up(&vl->waitq);
423 /* schedule for regular updates */
424 afs_vlocation_queue_for_updates(vl);
425 goto success;
427 found_in_memory:
428 /* found in memory */
429 _debug("found in memory");
430 atomic_inc(&vl->usage);
431 spin_unlock(&cell->vl_lock);
432 if (!list_empty(&vl->grave)) {
433 spin_lock(&afs_vlocation_graveyard_lock);
434 list_del_init(&vl->grave);
435 spin_unlock(&afs_vlocation_graveyard_lock);
437 up_write(&cell->vl_sem);
439 /* see if it was an abandoned record that we might try filling in */
440 spin_lock(&vl->lock);
441 while (vl->state != AFS_VL_VALID) {
442 afs_vlocation_state_t state = vl->state;
444 _debug("invalid [state %d]", state);
446 if (state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME) {
447 vl->state = AFS_VL_CREATING;
448 spin_unlock(&vl->lock);
449 goto fill_in_record;
452 /* must now wait for creation or update by someone else to
453 * complete */
454 _debug("wait");
456 spin_unlock(&vl->lock);
457 ret = wait_event_interruptible(vl->waitq,
458 vl->state == AFS_VL_NEW ||
459 vl->state == AFS_VL_VALID ||
460 vl->state == AFS_VL_NO_VOLUME);
461 if (ret < 0)
462 goto error;
463 spin_lock(&vl->lock);
465 spin_unlock(&vl->lock);
467 success:
468 _leave(" = %p",vl);
469 return vl;
471 error_abandon:
472 spin_lock(&vl->lock);
473 vl->state = AFS_VL_NEW;
474 spin_unlock(&vl->lock);
475 wake_up(&vl->waitq);
476 error:
477 ASSERT(vl != NULL);
478 afs_put_vlocation(vl);
479 _leave(" = %d", ret);
480 return ERR_PTR(ret);
484 * finish using a volume location record
486 void afs_put_vlocation(struct afs_vlocation *vl)
488 if (!vl)
489 return;
491 _enter("%s", vl->vldb.name);
493 ASSERTCMP(atomic_read(&vl->usage), >, 0);
495 if (likely(!atomic_dec_and_test(&vl->usage))) {
496 _leave("");
497 return;
500 spin_lock(&afs_vlocation_graveyard_lock);
501 if (atomic_read(&vl->usage) == 0) {
502 _debug("buried");
503 list_move_tail(&vl->grave, &afs_vlocation_graveyard);
504 vl->time_of_death = get_seconds();
505 schedule_delayed_work(&afs_vlocation_reap,
506 afs_vlocation_timeout * HZ);
508 /* suspend updates on this record */
509 if (!list_empty(&vl->update)) {
510 spin_lock(&afs_vlocation_updates_lock);
511 list_del_init(&vl->update);
512 spin_unlock(&afs_vlocation_updates_lock);
515 spin_unlock(&afs_vlocation_graveyard_lock);
516 _leave(" [killed?]");
520 * destroy a dead volume location record
522 static void afs_vlocation_destroy(struct afs_vlocation *vl)
524 _enter("%p", vl);
526 #ifdef AFS_CACHING_SUPPORT
527 cachefs_relinquish_cookie(vl->cache, 0);
528 #endif
530 afs_put_cell(vl->cell);
531 kfree(vl);
535 * reap dead volume location records
537 static void afs_vlocation_reaper(struct work_struct *work)
539 LIST_HEAD(corpses);
540 struct afs_vlocation *vl;
541 unsigned long delay, expiry;
542 time_t now;
544 _enter("");
546 now = get_seconds();
547 spin_lock(&afs_vlocation_graveyard_lock);
549 while (!list_empty(&afs_vlocation_graveyard)) {
550 vl = list_entry(afs_vlocation_graveyard.next,
551 struct afs_vlocation, grave);
553 _debug("check %p", vl);
555 /* the queue is ordered most dead first */
556 expiry = vl->time_of_death + afs_vlocation_timeout;
557 if (expiry > now) {
558 delay = (expiry - now) * HZ;
559 _debug("delay %lu", delay);
560 if (!schedule_delayed_work(&afs_vlocation_reap,
561 delay)) {
562 cancel_delayed_work(&afs_vlocation_reap);
563 schedule_delayed_work(&afs_vlocation_reap,
564 delay);
566 break;
569 spin_lock(&vl->cell->vl_lock);
570 if (atomic_read(&vl->usage) > 0) {
571 _debug("no reap");
572 list_del_init(&vl->grave);
573 } else {
574 _debug("reap");
575 list_move_tail(&vl->grave, &corpses);
576 list_del_init(&vl->link);
578 spin_unlock(&vl->cell->vl_lock);
581 spin_unlock(&afs_vlocation_graveyard_lock);
583 /* now reap the corpses we've extracted */
584 while (!list_empty(&corpses)) {
585 vl = list_entry(corpses.next, struct afs_vlocation, grave);
586 list_del(&vl->grave);
587 afs_vlocation_destroy(vl);
590 _leave("");
594 * initialise the VL update process
596 int __init afs_vlocation_update_init(void)
598 afs_vlocation_update_worker =
599 create_singlethread_workqueue("kafs_vlupdated");
600 return afs_vlocation_update_worker ? 0 : -ENOMEM;
604 * discard all the volume location records for rmmod
606 void afs_vlocation_purge(void)
608 afs_vlocation_timeout = 0;
610 spin_lock(&afs_vlocation_updates_lock);
611 list_del_init(&afs_vlocation_updates);
612 spin_unlock(&afs_vlocation_updates_lock);
613 cancel_delayed_work(&afs_vlocation_update);
614 queue_delayed_work(afs_vlocation_update_worker,
615 &afs_vlocation_update, 0);
616 destroy_workqueue(afs_vlocation_update_worker);
618 cancel_delayed_work(&afs_vlocation_reap);
619 schedule_delayed_work(&afs_vlocation_reap, 0);
623 * update a volume location
625 static void afs_vlocation_updater(struct work_struct *work)
627 struct afs_cache_vlocation vldb;
628 struct afs_vlocation *vl, *xvl;
629 time_t now;
630 long timeout;
631 int ret;
633 _enter("");
635 now = get_seconds();
637 /* find a record to update */
638 spin_lock(&afs_vlocation_updates_lock);
639 for (;;) {
640 if (list_empty(&afs_vlocation_updates)) {
641 spin_unlock(&afs_vlocation_updates_lock);
642 _leave(" [nothing]");
643 return;
646 vl = list_entry(afs_vlocation_updates.next,
647 struct afs_vlocation, update);
648 if (atomic_read(&vl->usage) > 0)
649 break;
650 list_del_init(&vl->update);
653 timeout = vl->update_at - now;
654 if (timeout > 0) {
655 queue_delayed_work(afs_vlocation_update_worker,
656 &afs_vlocation_update, timeout * HZ);
657 spin_unlock(&afs_vlocation_updates_lock);
658 _leave(" [nothing]");
659 return;
662 list_del_init(&vl->update);
663 atomic_inc(&vl->usage);
664 spin_unlock(&afs_vlocation_updates_lock);
666 /* we can now perform the update */
667 _debug("update %s", vl->vldb.name);
668 vl->state = AFS_VL_UPDATING;
669 vl->upd_rej_cnt = 0;
670 vl->upd_busy_cnt = 0;
672 ret = afs_vlocation_update_record(vl, NULL, &vldb);
673 spin_lock(&vl->lock);
674 switch (ret) {
675 case 0:
676 afs_vlocation_apply_update(vl, &vldb);
677 vl->state = AFS_VL_VALID;
678 break;
679 case -ENOMEDIUM:
680 vl->state = AFS_VL_VOLUME_DELETED;
681 break;
682 default:
683 vl->state = AFS_VL_UNCERTAIN;
684 break;
686 spin_unlock(&vl->lock);
687 wake_up(&vl->waitq);
689 /* and then reschedule */
690 _debug("reschedule");
691 vl->update_at = get_seconds() + afs_vlocation_update_timeout;
693 spin_lock(&afs_vlocation_updates_lock);
695 if (!list_empty(&afs_vlocation_updates)) {
696 /* next update in 10 minutes, but wait at least 1 second more
697 * than the newest record already queued so that we don't spam
698 * the VL server suddenly with lots of requests
700 xvl = list_entry(afs_vlocation_updates.prev,
701 struct afs_vlocation, update);
702 if (vl->update_at <= xvl->update_at)
703 vl->update_at = xvl->update_at + 1;
704 xvl = list_entry(afs_vlocation_updates.next,
705 struct afs_vlocation, update);
706 timeout = xvl->update_at - now;
707 if (timeout < 0)
708 timeout = 0;
709 } else {
710 timeout = afs_vlocation_update_timeout;
713 ASSERT(list_empty(&vl->update));
715 list_add_tail(&vl->update, &afs_vlocation_updates);
717 _debug("timeout %ld", timeout);
718 queue_delayed_work(afs_vlocation_update_worker,
719 &afs_vlocation_update, timeout * HZ);
720 spin_unlock(&afs_vlocation_updates_lock);
721 afs_put_vlocation(vl);