s3: Add an async smbsock_connect
[Samba.git] / source3 / smbd / oplock_onefs.c
blobd359f9c6f201638956cf6545aa99532e2ce3965c
1 /*
2 * Unix SMB/CIFS implementation.
3 * Support for OneFS kernel oplocks
5 * Copyright (C) Volker Lendecke 2007
6 * Copyright (C) Tim Prouty, 2009
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #define DBGC_CLASS DBGC_LOCKING
24 #include "includes.h"
26 #if HAVE_ONEFS
27 #include "oplock_onefs.h"
28 #include "smbd/globals.h"
30 #include <ifs/ifs_syscalls.h>
31 #include <isi_ecs/isi_ecs_oplocks.h>
32 #include <sys/proc.h>
34 struct onefs_oplocks_context {
35 struct kernel_oplocks *ctx;
36 const struct oplocks_event_ops *onefs_ops;
37 int onefs_event_fd;
38 struct fd_event *read_fde;
41 enum onefs_callback_state {
42 ONEFS_OPEN_FILE,
43 ONEFS_WAITING_FOR_OPLOCK
46 struct onefs_callback_record {
47 struct onefs_callback_record *prev, *next;
48 uint64_t id;
49 enum onefs_callback_state state;
50 union {
51 files_struct *fsp; /* ONEFS_OPEN_FILE */
52 uint16_t mid; /* ONEFS_WAITING_FOR_OPLOCK */
53 } data;
56 /**
57 * Internal list of files (along with additional state) that have outstanding
58 * oplocks or requests for oplocks.
60 struct onefs_callback_record *callback_recs;
62 /**
63 * Convert a onefs_callback_record to a string.
65 static char *onefs_callback_record_str_static(const struct onefs_callback_record *r)
67 static fstring result;
69 if (r == NULL) {
70 fstrcpy(result, "NULL callback record");
71 return result;
74 switch (r->state) {
75 case ONEFS_OPEN_FILE:
76 fstr_sprintf(result, "cb record %llu for file %s",
77 r->id, r->data.fsp->fsp_name);
78 break;
79 case ONEFS_WAITING_FOR_OPLOCK:
80 fstr_sprintf(result, "cb record %llu for pending mid %d",
81 r->id, (int)r->data.mid);
82 break;
83 default:
84 fstr_sprintf(result, "cb record %llu unknown state %d",
85 r->id, r->state);
86 break;
89 return result;
92 /**
93 * Traverse the list of onefs_callback_records and print all entries.
95 static void debug_cb_records(const char *fn)
97 struct onefs_callback_record *rec;
99 if (DEBUGLEVEL < 10)
100 return;
102 DEBUG(10, ("cb records (%s):\n", fn));
104 for (rec = callback_recs; rec; rec = rec->next) {
105 DEBUGADD(10, ("%s\n", onefs_callback_record_str_static(rec)));
110 * Find a callback record in the list of outstanding oplock operations.
112 * Once n ifs_createfile requests an oplock on a file, the kernel communicates
113 * with samba via the oplock event channel by sending events that reference an
114 * id. This function maps that id to the onefs_callback_record that was
115 * created for it during the initial setup on open (onefs_oplock_wait_record).
116 * When a matching id is found in the onefs_callback_record list, the
117 * callback_type is checked to make sure the record is in in the correct
118 * state.
120 static struct onefs_callback_record *onefs_find_cb(uint64_t id,
121 enum onefs_callback_state expected_state)
123 struct onefs_callback_record *rec;
125 debug_cb_records("onefs_find_cb");
127 for (rec = callback_recs; rec; rec = rec->next) {
128 if (rec->id == id) {
129 DEBUG(10, ("found %s\n",
130 onefs_callback_record_str_static(rec)));
131 break;
135 if (rec == NULL) {
136 DEBUG(5, ("Could not find callback record for id %llu\n", id));
137 return NULL;
140 if (rec->state != expected_state) {
141 DEBUG(0, ("Expected cb type %d, got %s", expected_state,
142 onefs_callback_record_str_static(rec)));
143 SMB_ASSERT(0);
144 return NULL;
147 return rec;
151 * Remove and free a callback record from the callback record list.
153 void destroy_onefs_callback_record(uint64_t id)
155 struct onefs_callback_record *rec;
157 debug_cb_records("destroy_onefs_callback_record");
159 if (id == 0) {
160 DEBUG(10, ("destroy_onefs_callback_record: Nothing to "
161 "destroy\n"));
162 return;
165 for (rec = callback_recs; rec; rec = rec->next) {
166 if (rec->id == id) {
167 DLIST_REMOVE(callback_recs, rec);
168 SAFE_FREE(rec);
169 DEBUG(10, ("removed cb rec %llu\n", id));
170 return;
174 DEBUG(0, ("Could not find cb rec %llu to delete", id));
175 SMB_ASSERT(0);
179 * Initialize a callback record and add it to the list of outstanding callback
180 * records.
182 * This is called in the open path before ifs_createfile so an id can be
183 * passed in. Each callback record can be in one of two states:
185 * 1. WAITING_FOR_OPLOCK: This is the initial state for all callback
186 * records. If ifs_createfile can be completed syncronously without needing
187 * to break any level I oplocks, the state is transitioned to OPEN_FILE.
188 * Otherwise ifs_createfile will finish asynchronously and the open is
189 * deferred. When the necessary level I opocks have been broken, and the
190 * open can be done, an event is sent by the kernel on the oplock event
191 * channel, which is handled by semlock_available_handler. At this point
192 * the deferred open is retried. Unless a level I oplock was acquired by
193 * another client, ifs_createfile will now complete synchronously.
195 * 2. OPEN_FILE: Once ifs_createfile completes, the callback record is
196 * transitioned to this state via onefs_set_oplock_callback.
198 uint64_t onefs_oplock_wait_record(uint16_t mid)
200 struct onefs_callback_record *result;
201 static uint64_t id_generator = 0;
203 if (!(result = SMB_MALLOC_P(struct onefs_callback_record))) {
204 DEBUG(0, ("talloc failed\n"));
205 return 0;
208 memset(result, '\0', sizeof(result));
210 id_generator += 1;
211 if (id_generator == 0) {
212 /* Wow, that's a long-running smbd... */
213 id_generator += 1;
216 result->id = id_generator;
218 result->state = ONEFS_WAITING_FOR_OPLOCK;
219 result->data.mid = mid;
220 DLIST_ADD(callback_recs, result);
222 DEBUG(10, ("New cb rec %llu created\n", result->id));
224 return result->id;
228 * Transition the callback record state to OPEN_FILE.
230 * This is called after the file is opened and an fsp struct has been
231 * allocated. The mid is dropped in favor of storing the fsp.
233 void onefs_set_oplock_callback(uint64_t id, files_struct *fsp)
235 struct onefs_callback_record *cb;
236 char *msg;
238 DEBUG(10, ("onefs_set_oplock_callback called for cb rec %llu\n", id));
240 if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
241 if (asprintf(&msg, "Got invalid callback %lld\n", id) != -1) {
242 smb_panic(msg);
244 smb_panic("Got invalid callback id\n");
248 * Paranoia check
250 if (open_was_deferred(cb->data.mid)) {
251 if (asprintf(&msg, "Trying to upgrade callback for deferred "
252 "open mid=%d\n", cb->data.mid) != -1) {
253 smb_panic(msg);
255 smb_panic("Trying to upgrade callback for deferred open "
256 "mid\n");
259 cb->state = ONEFS_OPEN_FILE;
260 cb->data.fsp = fsp;
264 * Using a callback record, initialize a share mode entry to pass to
265 * share_mode_entry_to_message to send samba IPC messages.
267 static void init_share_mode_entry(struct share_mode_entry *sme,
268 struct onefs_callback_record *cb,
269 int op_type)
271 ZERO_STRUCT(*sme);
273 sme->pid = procid_self();
274 sme->op_type = op_type;
275 sme->id = cb->data.fsp->file_id;
276 sme->share_file_id = cb->data.fsp->fh->gen_id;
280 * Callback when a break-to-none event is received from the kernel.
282 * On OneFS level 1 oplocks are always broken to level 2 first, therefore an
283 * async level 2 break message is always sent when breaking to none. The
284 * downside of this is that OneFS currently has no way to express breaking
285 * directly from level 1 to none.
287 static void oplock_break_to_none_handler(uint64_t id)
289 struct onefs_callback_record *cb;
290 struct share_mode_entry sme;
291 char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
293 DEBUG(10, ("oplock_break_to_none_handler called for id %llu\n", id));
295 if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
296 DEBUG(3, ("oplock_break_to_none_handler: could not find "
297 "callback id %llu\n", id));
298 return;
301 DEBUG(10, ("oplock_break_to_none_handler called for file %s\n",
302 cb->data.fsp->fsp_name));
304 init_share_mode_entry(&sme, cb, FORCE_OPLOCK_BREAK_TO_NONE);
305 share_mode_entry_to_message(msg, &sme);
306 messaging_send_buf(smbd_messaging_context(),
307 sme.pid,
308 MSG_SMB_ASYNC_LEVEL2_BREAK,
309 (uint8_t *)msg,
310 MSG_SMB_SHARE_MODE_ENTRY_SIZE);
313 * We could still receive an OPLOCK_REVOKED message, so keep the
314 * oplock_callback_id around.
319 * Callback when a break-to-level2 event is received from the kernel.
321 * Breaks from level 1 to level 2.
323 static void oplock_break_to_level_two_handler(uint64_t id)
325 struct onefs_callback_record *cb;
326 struct share_mode_entry sme;
327 char msg[MSG_SMB_SHARE_MODE_ENTRY_SIZE];
329 DEBUG(10, ("oplock_break_to_level_two_handler called for id %llu\n",
330 id));
332 if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
333 DEBUG(3, ("oplock_break_to_level_two_handler: could not find "
334 "callback id %llu\n", id));
335 return;
338 DEBUG(10, ("oplock_break_to_level_two_handler called for file %s\n",
339 cb->data.fsp->fsp_name));
341 init_share_mode_entry(&sme, cb, LEVEL_II_OPLOCK);
342 share_mode_entry_to_message(msg, &sme);
343 messaging_send_buf(smbd_messaging_context(),
344 sme.pid,
345 MSG_SMB_BREAK_REQUEST,
346 (uint8_t *)msg,
347 MSG_SMB_SHARE_MODE_ENTRY_SIZE);
350 * We could still receive an OPLOCK_REVOKED or OPLOCK_BREAK_TO_NONE
351 * message, so keep the oplock_callback_id around.
356 * Revoke an oplock from an unresponsive client.
358 * The kernel will send this message when it times out waiting for a level 1
359 * oplock break to be acknowledged by the client. The oplock is then
360 * immediately removed.
362 static void oplock_revoked_handler(uint64_t id)
364 struct onefs_callback_record *cb;
365 files_struct *fsp = NULL;
367 DEBUG(10, ("oplock_revoked_handler called for id %llu\n", id));
369 if (!(cb = onefs_find_cb(id, ONEFS_OPEN_FILE))) {
370 DEBUG(3, ("oplock_revoked_handler: could not find "
371 "callback id %llu\n", id));
372 return;
375 fsp = cb->data.fsp;
377 SMB_ASSERT(fsp->oplock_timeout == NULL);
379 DEBUG(0,("Level 1 oplock break failed for file %s. Forcefully "
380 "revoking oplock\n", fsp->fsp_name));
382 global_client_failed_oplock_break = True;
383 remove_oplock(fsp);
386 * cb record is cleaned up in fsp ext data destructor on close, so
387 * leave it in the list.
392 * Asynchronous ifs_createfile callback
394 * If ifs_createfile had to asynchronously break any oplocks, this function is
395 * called when the kernel sends an event that the open can be retried.
397 static void semlock_available_handler(uint64_t id)
399 struct onefs_callback_record *cb;
401 DEBUG(10, ("semlock_available_handler called: %llu\n", id));
403 if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
404 DEBUG(5, ("semlock_available_handler: Did not find callback "
405 "%llu\n", id));
406 return;
409 DEBUG(10, ("Got semlock available for mid %d\n", cb->data.mid));
411 /* Paranoia check */
412 if (!(open_was_deferred(cb->data.mid))) {
413 char *msg;
414 if (asprintf(&msg, "Semlock available on an open that wasn't "
415 "deferred: %s\n",
416 onefs_callback_record_str_static(cb)) != -1) {
417 smb_panic(msg);
419 smb_panic("Semlock available on an open that wasn't "
420 "deferred\n");
423 schedule_deferred_open_smb_message(cb->data.mid);
425 /* Cleanup the callback record since the open will be retried. */
426 destroy_onefs_callback_record(id);
428 return;
432 * Asynchronous ifs_createfile failure callback
434 * If ifs_createfile had to asynchronously break any oplocks, but an error was
435 * encountered in the kernel, the open will be retried with the state->failed
436 * set to true. This will prompt the open path to send an INTERNAL_ERROR
437 * error message to the client.
439 static void semlock_async_failure_handler(uint64_t id)
441 struct onefs_callback_record *cb;
442 struct pending_message_list *pml;
443 struct deferred_open_record *state;
445 DEBUG(1, ("semlock_async_failure_handler called: %llu\n", id));
447 if (!(cb = onefs_find_cb(id, ONEFS_WAITING_FOR_OPLOCK))) {
448 DEBUG(5, ("semlock_async_failure_handler: Did not find callback "
449 "%llu\n", id));
450 return;
453 DEBUG(1, ("Got semlock_async_failure message for mid %d\n", cb->data.mid));
455 /* Paranoia check */
456 if (!(open_was_deferred(cb->data.mid))) {
457 char *msg;
458 if (asprintf(&msg, "Semlock failure on an open that wasn't "
459 "deferred: %s\n",
460 onefs_callback_record_str_static(cb)) != -1) {
461 smb_panic(msg);
463 smb_panic("Semlock failure on an open that wasn't deferred\n");
466 /* Find the actual deferred open record. */
467 if (!(pml = get_open_deferred_message(cb->data.mid))) {
468 DEBUG(0, ("Could not find deferred request for "
469 "mid %d\n", cb->data.mid));
470 destroy_onefs_callback_record(id);
471 return;
473 state = (struct deferred_open_record *)pml->private_data.data;
475 /* Update to failed so the client can be notified on retried open. */
476 state->failed = true;
478 /* Schedule deferred open for immediate retry. */
479 schedule_deferred_open_smb_message(cb->data.mid);
481 /* Cleanup the callback record here since the open will be retried. */
482 destroy_onefs_callback_record(id);
484 return;
488 * OneFS acquires all oplocks via ifs_createfile, so this is a no-op.
490 static bool onefs_set_kernel_oplock(struct kernel_oplocks *_ctx,
491 files_struct *fsp, int oplock_type) {
492 return true;
496 * Release the kernel oplock.
498 static void onefs_release_kernel_oplock(struct kernel_oplocks *_ctx,
499 files_struct *fsp, int oplock_type)
501 enum oplock_type oplock = onefs_samba_oplock_to_oplock(oplock_type);
503 DEBUG(10, ("onefs_release_kernel_oplock: Releasing %s to type %s\n",
504 fsp->fsp_name, onefs_oplock_str(oplock)));
506 if (fsp->fh->fd == -1) {
507 DEBUG(1, ("no fd\n"));
508 return;
511 /* Downgrade oplock to either SHARED or NONE. */
512 if (ifs_oplock_downgrade(fsp->fh->fd, oplock)) {
513 DEBUG(1,("ifs_oplock_downgrade failed: %s\n",
514 strerror(errno)));
519 * Wrap ifs_semlock_write so it is only called on operations that aren't
520 * already contended in the kernel.
522 static void onefs_semlock_write(int fd, enum level2_contention_type type,
523 enum semlock_operation semlock_op)
525 int ret;
527 switch (type) {
528 case LEVEL2_CONTEND_ALLOC_GROW:
529 case LEVEL2_CONTEND_POSIX_BRL:
530 DEBUG(10, ("Taking %d write semlock for cmd %d on fd: %d\n",
531 semlock_op, type, fd));
532 ret = ifs_semlock_write(fd, semlock_op);
533 if (ret) {
534 DEBUG(0,("ifs_semlock_write failed taking %d write "
535 "semlock for cmd %d on fd: %d: %s",
536 semlock_op, type, fd, strerror(errno)));
538 break;
539 default:
540 DEBUG(10, ("Skipping write semlock for cmd %d on fd: %d\n",
541 type, fd));
546 * Contend level 2 oplocks in the kernel and smbd.
548 * Taking a write semlock will contend all level 2 oplocks in all smbds across
549 * the cluster except the fsp's own level 2 oplock. This lack of
550 * self-contention is a limitation of the current OneFS kernel oplocks
551 * implementation. Luckily it is easy to contend our own level 2 oplock by
552 * checking the the fsp's oplock_type. If it's a level2, send a break message
553 * to the client and remove the oplock.
555 static void onefs_contend_level2_oplocks_begin(files_struct *fsp,
556 enum level2_contention_type type)
558 /* Take care of level 2 kernel contention. */
559 onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_LOCK);
561 /* Take care of level 2 self contention. */
562 if (LEVEL_II_OPLOCK_TYPE(fsp->oplock_type))
563 break_level2_to_none_async(fsp);
567 * Unlock the write semlock when the level 2 contending operation ends.
569 static void onefs_contend_level2_oplocks_end(files_struct *fsp,
570 enum level2_contention_type type)
572 /* Take care of level 2 kernel contention. */
573 onefs_semlock_write(fsp->fh->fd, type, SEMLOCK_UNLOCK);
577 * Return string value of onefs oplock types.
579 const char *onefs_oplock_str(enum oplock_type onefs_oplock_type)
581 switch (onefs_oplock_type) {
582 case OPLOCK_NONE:
583 return "OPLOCK_NONE";
584 case OPLOCK_EXCLUSIVE:
585 return "OPLOCK_EXCLUSIVE";
586 case OPLOCK_BATCH:
587 return "OPLOCK_BATCH";
588 case OPLOCK_SHARED:
589 return "OPLOCK_SHARED";
590 default:
591 break;
593 return "UNKNOWN";
597 * Convert from onefs to samba oplock.
599 int onefs_oplock_to_samba_oplock(enum oplock_type onefs_oplock)
601 switch (onefs_oplock) {
602 case OPLOCK_NONE:
603 return NO_OPLOCK;
604 case OPLOCK_EXCLUSIVE:
605 return EXCLUSIVE_OPLOCK;
606 case OPLOCK_BATCH:
607 return BATCH_OPLOCK;
608 case OPLOCK_SHARED:
609 return LEVEL_II_OPLOCK;
610 default:
611 DEBUG(0, ("unknown oplock type %d found\n", onefs_oplock));
612 break;
614 return NO_OPLOCK;
618 * Convert from samba to onefs oplock.
620 enum oplock_type onefs_samba_oplock_to_oplock(int samba_oplock_type)
622 if (BATCH_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_BATCH;
623 if (EXCLUSIVE_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_EXCLUSIVE;
624 if (LEVEL_II_OPLOCK_TYPE(samba_oplock_type)) return OPLOCK_SHARED;
625 return OPLOCK_NONE;
629 * Oplock event handler.
631 * Call into the event system dispatcher to handle each event.
633 static void onefs_oplocks_read_fde_handler(struct event_context *ev,
634 struct fd_event *fde,
635 uint16_t flags,
636 void *private_data)
638 struct onefs_oplocks_context *ctx =
639 talloc_get_type(private_data, struct onefs_oplocks_context);
641 if (oplocks_event_dispatcher(ctx->onefs_ops)) {
642 DEBUG(0, ("oplocks_event_dispatcher failed: %s\n",
643 strerror(errno)));
648 * Setup kernel oplocks
650 static const struct kernel_oplocks_ops onefs_koplocks_ops = {
651 .set_oplock = onefs_set_kernel_oplock,
652 .release_oplock = onefs_release_kernel_oplock,
653 .contend_level2_oplocks_begin = onefs_contend_level2_oplocks_begin,
654 .contend_level2_oplocks_end = onefs_contend_level2_oplocks_end,
657 static const struct oplocks_event_ops onefs_dispatch_ops = {
658 .oplock_break_to_none = oplock_break_to_none_handler,
659 .oplock_break_to_level_two = oplock_break_to_level_two_handler,
660 .oplock_revoked = oplock_revoked_handler,
661 .semlock_available = semlock_available_handler,
662 .semlock_async_failure = semlock_async_failure_handler,
665 struct kernel_oplocks *onefs_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
667 struct kernel_oplocks *_ctx = NULL;
668 struct onefs_oplocks_context *ctx = NULL;
669 struct procoptions po = PROCOPTIONS_INIT;
671 DEBUG(10, ("onefs_init_kernel_oplocks called\n"));
673 /* Set the non-blocking proc flag */
674 po.po_flags_on |= P_NON_BLOCKING_SEMLOCK;
675 if (setprocoptions(&po) != 0) {
676 DEBUG(0, ("setprocoptions failed: %s.\n", strerror(errno)));
677 return NULL;
680 /* Setup the oplock contexts */
681 _ctx = talloc_zero(mem_ctx, struct kernel_oplocks);
682 if (!_ctx) {
683 return NULL;
686 ctx = talloc_zero(_ctx, struct onefs_oplocks_context);
687 if (!ctx) {
688 goto err_out;
691 _ctx->ops = &onefs_koplocks_ops;
692 _ctx->flags = (KOPLOCKS_LEVEL2_SUPPORTED |
693 KOPLOCKS_DEFERRED_OPEN_NOTIFICATION |
694 KOPLOCKS_TIMEOUT_NOTIFICATION |
695 KOPLOCKS_OPLOCK_BROKEN_NOTIFICATION);
696 _ctx->private_data = ctx;
697 ctx->ctx = _ctx;
698 ctx->onefs_ops = &onefs_dispatch_ops;
700 /* Register an kernel event channel for oplocks */
701 ctx->onefs_event_fd = oplocks_event_register();
702 if (ctx->onefs_event_fd == -1) {
703 DEBUG(0, ("oplocks_event_register failed: %s\n",
704 strerror(errno)));
705 goto err_out;
708 DEBUG(10, ("oplock event_fd = %d\n", ctx->onefs_event_fd));
710 /* Register the oplock event_fd with samba's event system */
711 ctx->read_fde = event_add_fd(smbd_event_context(),
712 ctx,
713 ctx->onefs_event_fd,
714 EVENT_FD_READ,
715 onefs_oplocks_read_fde_handler,
716 ctx);
717 return _ctx;
719 err_out:
720 talloc_free(_ctx);
721 return NULL;
724 #else
725 void oplock_onefs_dummy(void);
726 void oplock_onefs_dummy(void) {}
727 #endif /* HAVE_ONEFS */