Add an optimization to pthread aio writes to also do fsync if requested.
[Samba/gbeck.git] / source3 / smbd / aio.c
blob569741c747dc0fc067566ada43b8802008eba247
1 /*
2 Unix SMB/Netbios implementation.
3 Version 3.0
4 async_io read handling using POSIX async io.
5 Copyright (C) Jeremy Allison 2005.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "smbd/smbd.h"
23 #include "smbd/globals.h"
24 #include "../lib/util/tevent_ntstatus.h"
26 #if defined(HAVE_AIO)
28 /* The signal we'll use to signify aio done. */
29 #ifndef RT_SIGNAL_AIO
30 #define RT_SIGNAL_AIO (SIGRTMIN+3)
31 #endif
33 #ifndef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIVAL_PTR
34 #ifdef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIGVAL_PTR
35 #define sival_int sigval_int
36 #define sival_ptr sigval_ptr
37 #endif
38 #endif
40 /****************************************************************************
41 The buffer we keep around whilst an aio request is in process.
42 *****************************************************************************/
44 struct aio_extra {
45 struct aio_extra *next, *prev;
46 SMB_STRUCT_AIOCB acb;
47 files_struct *fsp;
48 struct smb_request *smbreq;
49 DATA_BLOB outbuf;
50 struct lock_struct lock;
51 bool write_through;
52 int (*handle_completion)(struct aio_extra *ex, int errcode);
55 /****************************************************************************
56 Accessor function to return write_through state.
57 *****************************************************************************/
59 bool aio_write_through_requested(struct aio_extra *aio_ex)
61 return aio_ex->write_through;
64 /****************************************************************************
65 Initialize the signal handler for aio read/write.
66 *****************************************************************************/
68 static void smbd_aio_signal_handler(struct tevent_context *ev_ctx,
69 struct tevent_signal *se,
70 int signum, int count,
71 void *_info, void *private_data)
73 siginfo_t *info = (siginfo_t *)_info;
74 struct aio_extra *aio_ex = (struct aio_extra *)
75 info->si_value.sival_ptr;
77 smbd_aio_complete_aio_ex(aio_ex);
78 TALLOC_FREE(aio_ex);
82 bool initialize_async_io_handler(void)
84 static bool tried_signal_setup = false;
86 if (aio_signal_event) {
87 return true;
89 if (tried_signal_setup) {
90 return false;
92 tried_signal_setup = true;
94 aio_signal_event = tevent_add_signal(server_event_context(),
95 server_event_context(),
96 RT_SIGNAL_AIO, SA_SIGINFO,
97 smbd_aio_signal_handler,
98 NULL);
99 if (!aio_signal_event) {
100 DEBUG(10, ("Failed to setup RT_SIGNAL_AIO handler\n"));
101 return false;
103 return true;
106 static int handle_aio_read_complete(struct aio_extra *aio_ex, int errcode);
107 static int handle_aio_write_complete(struct aio_extra *aio_ex, int errcode);
108 static int handle_aio_smb2_read_complete(struct aio_extra *aio_ex, int errcode);
109 static int handle_aio_smb2_write_complete(struct aio_extra *aio_ex, int errcode);
111 static int aio_extra_destructor(struct aio_extra *aio_ex)
113 DLIST_REMOVE(aio_list_head, aio_ex);
114 outstanding_aio_calls--;
115 return 0;
118 /****************************************************************************
119 Create the extended aio struct we must keep around for the lifetime
120 of the aio call.
121 *****************************************************************************/
123 static struct aio_extra *create_aio_extra(TALLOC_CTX *mem_ctx,
124 files_struct *fsp,
125 size_t buflen)
127 struct aio_extra *aio_ex = talloc_zero(mem_ctx, struct aio_extra);
129 if (!aio_ex) {
130 return NULL;
133 /* The output buffer stored in the aio_ex is the start of
134 the smb return buffer. The buffer used in the acb
135 is the start of the reply data portion of that buffer. */
137 if (buflen) {
138 aio_ex->outbuf = data_blob_talloc(aio_ex, NULL, buflen);
139 if (!aio_ex->outbuf.data) {
140 TALLOC_FREE(aio_ex);
141 return NULL;
144 DLIST_ADD(aio_list_head, aio_ex);
145 talloc_set_destructor(aio_ex, aio_extra_destructor);
146 aio_ex->fsp = fsp;
147 outstanding_aio_calls++;
148 return aio_ex;
151 /****************************************************************************
152 Set up an aio request from a SMBreadX call.
153 *****************************************************************************/
155 NTSTATUS schedule_aio_read_and_X(connection_struct *conn,
156 struct smb_request *smbreq,
157 files_struct *fsp, off_t startpos,
158 size_t smb_maxcnt)
160 struct aio_extra *aio_ex;
161 SMB_STRUCT_AIOCB *a;
162 size_t bufsize;
163 size_t min_aio_read_size = lp_aio_read_size(SNUM(conn));
164 int ret;
166 if (fsp->base_fsp != NULL) {
167 /* No AIO on streams yet */
168 DEBUG(10, ("AIO on streams not yet supported\n"));
169 return NT_STATUS_RETRY;
172 if ((!min_aio_read_size || (smb_maxcnt < min_aio_read_size))
173 && !SMB_VFS_AIO_FORCE(fsp)) {
174 /* Too small a read for aio request. */
175 DEBUG(10,("schedule_aio_read_and_X: read size (%u) too small "
176 "for minimum aio_read of %u\n",
177 (unsigned int)smb_maxcnt,
178 (unsigned int)min_aio_read_size ));
179 return NT_STATUS_RETRY;
182 /* Only do this on non-chained and non-chaining reads not using the
183 * write cache. */
184 if (req_is_in_chain(smbreq) || (lp_write_cache_size(SNUM(conn)) != 0)) {
185 return NT_STATUS_RETRY;
188 if (outstanding_aio_calls >= aio_pending_size) {
189 DEBUG(10,("schedule_aio_read_and_X: Already have %d aio "
190 "activities outstanding.\n",
191 outstanding_aio_calls ));
192 return NT_STATUS_RETRY;
195 /* The following is safe from integer wrap as we've already checked
196 smb_maxcnt is 128k or less. Wct is 12 for read replies */
198 bufsize = smb_size + 12 * 2 + smb_maxcnt;
200 if ((aio_ex = create_aio_extra(NULL, fsp, bufsize)) == NULL) {
201 DEBUG(10,("schedule_aio_read_and_X: malloc fail.\n"));
202 return NT_STATUS_NO_MEMORY;
204 aio_ex->handle_completion = handle_aio_read_complete;
206 construct_reply_common_req(smbreq, (char *)aio_ex->outbuf.data);
207 srv_set_message((char *)aio_ex->outbuf.data, 12, 0, True);
208 SCVAL(aio_ex->outbuf.data,smb_vwv0,0xFF); /* Never a chained reply. */
210 init_strict_lock_struct(fsp, (uint64_t)smbreq->smbpid,
211 (uint64_t)startpos, (uint64_t)smb_maxcnt, READ_LOCK,
212 &aio_ex->lock);
214 /* Take the lock until the AIO completes. */
215 if (!SMB_VFS_STRICT_LOCK(conn, fsp, &aio_ex->lock)) {
216 TALLOC_FREE(aio_ex);
217 return NT_STATUS_FILE_LOCK_CONFLICT;
220 a = &aio_ex->acb;
222 /* Now set up the aio record for the read call. */
224 a->aio_fildes = fsp->fh->fd;
225 a->aio_buf = smb_buf(aio_ex->outbuf.data);
226 a->aio_nbytes = smb_maxcnt;
227 a->aio_offset = startpos;
228 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
229 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
230 a->aio_sigevent.sigev_value.sival_ptr = aio_ex;
232 ret = SMB_VFS_AIO_READ(fsp, a);
233 if (ret == -1) {
234 DEBUG(0,("schedule_aio_read_and_X: aio_read failed. "
235 "Error %s\n", strerror(errno) ));
236 SMB_VFS_STRICT_UNLOCK(conn, fsp, &aio_ex->lock);
237 TALLOC_FREE(aio_ex);
238 return NT_STATUS_RETRY;
241 aio_ex->smbreq = talloc_move(aio_ex, &smbreq);
243 DEBUG(10,("schedule_aio_read_and_X: scheduled aio_read for file %s, "
244 "offset %.0f, len = %u (mid = %u)\n",
245 fsp_str_dbg(fsp), (double)startpos, (unsigned int)smb_maxcnt,
246 (unsigned int)aio_ex->smbreq->mid ));
248 return NT_STATUS_OK;
251 /****************************************************************************
252 Set up an aio request from a SMBwriteX call.
253 *****************************************************************************/
255 NTSTATUS schedule_aio_write_and_X(connection_struct *conn,
256 struct smb_request *smbreq,
257 files_struct *fsp, const char *data,
258 off_t startpos,
259 size_t numtowrite)
261 struct aio_extra *aio_ex;
262 SMB_STRUCT_AIOCB *a;
263 size_t bufsize;
264 size_t min_aio_write_size = lp_aio_write_size(SNUM(conn));
265 int ret;
267 if (fsp->base_fsp != NULL) {
268 /* No AIO on streams yet */
269 DEBUG(10, ("AIO on streams not yet supported\n"));
270 return NT_STATUS_RETRY;
273 if ((!min_aio_write_size || (numtowrite < min_aio_write_size))
274 && !SMB_VFS_AIO_FORCE(fsp)) {
275 /* Too small a write for aio request. */
276 DEBUG(10,("schedule_aio_write_and_X: write size (%u) too "
277 "small for minimum aio_write of %u\n",
278 (unsigned int)numtowrite,
279 (unsigned int)min_aio_write_size ));
280 return NT_STATUS_RETRY;
283 /* Only do this on non-chained and non-chaining writes not using the
284 * write cache. */
285 if (req_is_in_chain(smbreq) || (lp_write_cache_size(SNUM(conn)) != 0)) {
286 return NT_STATUS_RETRY;
289 if (outstanding_aio_calls >= aio_pending_size) {
290 DEBUG(3,("schedule_aio_write_and_X: Already have %d aio "
291 "activities outstanding.\n",
292 outstanding_aio_calls ));
293 DEBUG(10,("schedule_aio_write_and_X: failed to schedule "
294 "aio_write for file %s, offset %.0f, len = %u "
295 "(mid = %u)\n",
296 fsp_str_dbg(fsp), (double)startpos,
297 (unsigned int)numtowrite,
298 (unsigned int)smbreq->mid ));
299 return NT_STATUS_RETRY;
302 bufsize = smb_size + 6*2;
304 if (!(aio_ex = create_aio_extra(NULL, fsp, bufsize))) {
305 DEBUG(0,("schedule_aio_write_and_X: malloc fail.\n"));
306 return NT_STATUS_NO_MEMORY;
308 aio_ex->handle_completion = handle_aio_write_complete;
309 aio_ex->write_through = BITSETW(smbreq->vwv+7,0);
311 construct_reply_common_req(smbreq, (char *)aio_ex->outbuf.data);
312 srv_set_message((char *)aio_ex->outbuf.data, 6, 0, True);
313 SCVAL(aio_ex->outbuf.data,smb_vwv0,0xFF); /* Never a chained reply. */
315 init_strict_lock_struct(fsp, (uint64_t)smbreq->smbpid,
316 (uint64_t)startpos, (uint64_t)numtowrite, WRITE_LOCK,
317 &aio_ex->lock);
319 /* Take the lock until the AIO completes. */
320 if (!SMB_VFS_STRICT_LOCK(conn, fsp, &aio_ex->lock)) {
321 TALLOC_FREE(aio_ex);
322 return NT_STATUS_FILE_LOCK_CONFLICT;
325 a = &aio_ex->acb;
327 /* Now set up the aio record for the write call. */
329 a->aio_fildes = fsp->fh->fd;
330 a->aio_buf = discard_const_p(char, data);
331 a->aio_nbytes = numtowrite;
332 a->aio_offset = startpos;
333 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
334 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
335 a->aio_sigevent.sigev_value.sival_ptr = aio_ex;
337 ret = SMB_VFS_AIO_WRITE(fsp, a);
338 if (ret == -1) {
339 DEBUG(3,("schedule_aio_wrote_and_X: aio_write failed. "
340 "Error %s\n", strerror(errno) ));
341 SMB_VFS_STRICT_UNLOCK(conn, fsp, &aio_ex->lock);
342 TALLOC_FREE(aio_ex);
343 return NT_STATUS_RETRY;
346 aio_ex->smbreq = talloc_move(aio_ex, &smbreq);
348 /* This should actually be improved to span the write. */
349 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
350 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
352 if (!aio_ex->write_through && !lp_syncalways(SNUM(fsp->conn))
353 && fsp->aio_write_behind) {
354 /* Lie to the client and immediately claim we finished the
355 * write. */
356 SSVAL(aio_ex->outbuf.data,smb_vwv2,numtowrite);
357 SSVAL(aio_ex->outbuf.data,smb_vwv4,(numtowrite>>16)&1);
358 show_msg((char *)aio_ex->outbuf.data);
359 if (!srv_send_smb(aio_ex->smbreq->sconn,
360 (char *)aio_ex->outbuf.data,
361 true, aio_ex->smbreq->seqnum+1,
362 IS_CONN_ENCRYPTED(fsp->conn),
363 &aio_ex->smbreq->pcd)) {
364 exit_server_cleanly("schedule_aio_write_and_X: "
365 "srv_send_smb failed.");
367 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write "
368 "behind for file %s\n", fsp_str_dbg(fsp)));
371 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write for file "
372 "%s, offset %.0f, len = %u (mid = %u) "
373 "outstanding_aio_calls = %d\n",
374 fsp_str_dbg(fsp), (double)startpos, (unsigned int)numtowrite,
375 (unsigned int)aio_ex->smbreq->mid, outstanding_aio_calls ));
377 return NT_STATUS_OK;
380 bool cancel_smb2_aio(struct smb_request *smbreq)
382 struct smbd_smb2_request *smb2req = smbreq->smb2req;
383 struct aio_extra *aio_ex = NULL;
384 int ret;
386 if (smb2req) {
387 aio_ex = talloc_get_type(smbreq->async_priv,
388 struct aio_extra);
391 if (aio_ex == NULL) {
392 return false;
395 if (aio_ex->fsp == NULL) {
396 return false;
399 ret = SMB_VFS_AIO_CANCEL(aio_ex->fsp, &aio_ex->acb);
400 if (ret != AIO_CANCELED) {
401 return false;
404 return true;
407 /****************************************************************************
408 Set up an aio request from a SMB2 read call.
409 *****************************************************************************/
411 NTSTATUS schedule_smb2_aio_read(connection_struct *conn,
412 struct smb_request *smbreq,
413 files_struct *fsp,
414 TALLOC_CTX *ctx,
415 DATA_BLOB *preadbuf,
416 off_t startpos,
417 size_t smb_maxcnt)
419 struct aio_extra *aio_ex;
420 SMB_STRUCT_AIOCB *a;
421 size_t min_aio_read_size = lp_aio_read_size(SNUM(conn));
422 int ret;
424 if (fsp->base_fsp != NULL) {
425 /* No AIO on streams yet */
426 DEBUG(10, ("AIO on streams not yet supported\n"));
427 return NT_STATUS_RETRY;
430 if ((!min_aio_read_size || (smb_maxcnt < min_aio_read_size))
431 && !SMB_VFS_AIO_FORCE(fsp)) {
432 /* Too small a read for aio request. */
433 DEBUG(10,("smb2: read size (%u) too small "
434 "for minimum aio_read of %u\n",
435 (unsigned int)smb_maxcnt,
436 (unsigned int)min_aio_read_size ));
437 return NT_STATUS_RETRY;
440 /* Only do this on reads not using the write cache. */
441 if (lp_write_cache_size(SNUM(conn)) != 0) {
442 return NT_STATUS_RETRY;
445 if (outstanding_aio_calls >= aio_pending_size) {
446 DEBUG(10,("smb2: Already have %d aio "
447 "activities outstanding.\n",
448 outstanding_aio_calls ));
449 return NT_STATUS_RETRY;
452 /* Create the out buffer. */
453 *preadbuf = data_blob_talloc(ctx, NULL, smb_maxcnt);
454 if (preadbuf->data == NULL) {
455 return NT_STATUS_NO_MEMORY;
458 if (!(aio_ex = create_aio_extra(smbreq->smb2req, fsp, 0))) {
459 return NT_STATUS_NO_MEMORY;
461 aio_ex->handle_completion = handle_aio_smb2_read_complete;
463 init_strict_lock_struct(fsp, (uint64_t)smbreq->smbpid,
464 (uint64_t)startpos, (uint64_t)smb_maxcnt, READ_LOCK,
465 &aio_ex->lock);
467 /* Take the lock until the AIO completes. */
468 if (!SMB_VFS_STRICT_LOCK(conn, fsp, &aio_ex->lock)) {
469 TALLOC_FREE(aio_ex);
470 return NT_STATUS_FILE_LOCK_CONFLICT;
473 a = &aio_ex->acb;
475 /* Now set up the aio record for the read call. */
477 a->aio_fildes = fsp->fh->fd;
478 a->aio_buf = preadbuf->data;
479 a->aio_nbytes = smb_maxcnt;
480 a->aio_offset = startpos;
481 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
482 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
483 a->aio_sigevent.sigev_value.sival_ptr = aio_ex;
485 ret = SMB_VFS_AIO_READ(fsp, a);
486 if (ret == -1) {
487 DEBUG(0,("smb2: aio_read failed. "
488 "Error %s\n", strerror(errno) ));
489 SMB_VFS_STRICT_UNLOCK(conn, fsp, &aio_ex->lock);
490 TALLOC_FREE(aio_ex);
491 return NT_STATUS_RETRY;
494 /* We don't need talloc_move here as both aio_ex and
495 * smbreq are children of smbreq->smb2req. */
496 aio_ex->smbreq = smbreq;
497 smbreq->async_priv = aio_ex;
499 DEBUG(10,("smb2: scheduled aio_read for file %s, "
500 "offset %.0f, len = %u (mid = %u)\n",
501 fsp_str_dbg(fsp), (double)startpos, (unsigned int)smb_maxcnt,
502 (unsigned int)aio_ex->smbreq->mid ));
504 return NT_STATUS_OK;
507 /****************************************************************************
508 Set up an aio request from a SMB2write call.
509 *****************************************************************************/
511 NTSTATUS schedule_aio_smb2_write(connection_struct *conn,
512 struct smb_request *smbreq,
513 files_struct *fsp,
514 uint64_t in_offset,
515 DATA_BLOB in_data,
516 bool write_through)
518 struct aio_extra *aio_ex = NULL;
519 SMB_STRUCT_AIOCB *a = NULL;
520 size_t min_aio_write_size = lp_aio_write_size(SNUM(conn));
521 int ret;
523 if (fsp->base_fsp != NULL) {
524 /* No AIO on streams yet */
525 DEBUG(10, ("AIO on streams not yet supported\n"));
526 return NT_STATUS_RETRY;
529 if ((!min_aio_write_size || (in_data.length < min_aio_write_size))
530 && !SMB_VFS_AIO_FORCE(fsp)) {
531 /* Too small a write for aio request. */
532 DEBUG(10,("smb2: write size (%u) too "
533 "small for minimum aio_write of %u\n",
534 (unsigned int)in_data.length,
535 (unsigned int)min_aio_write_size ));
536 return NT_STATUS_RETRY;
539 /* Only do this on writes not using the write cache. */
540 if (lp_write_cache_size(SNUM(conn)) != 0) {
541 return NT_STATUS_RETRY;
544 if (outstanding_aio_calls >= aio_pending_size) {
545 DEBUG(3,("smb2: Already have %d aio "
546 "activities outstanding.\n",
547 outstanding_aio_calls ));
548 return NT_STATUS_RETRY;
551 if (!(aio_ex = create_aio_extra(smbreq->smb2req, fsp, 0))) {
552 return NT_STATUS_NO_MEMORY;
555 aio_ex->handle_completion = handle_aio_smb2_write_complete;
556 aio_ex->write_through = write_through;
558 init_strict_lock_struct(fsp, (uint64_t)smbreq->smbpid,
559 in_offset, (uint64_t)in_data.length, WRITE_LOCK,
560 &aio_ex->lock);
562 /* Take the lock until the AIO completes. */
563 if (!SMB_VFS_STRICT_LOCK(conn, fsp, &aio_ex->lock)) {
564 TALLOC_FREE(aio_ex);
565 return NT_STATUS_FILE_LOCK_CONFLICT;
568 a = &aio_ex->acb;
570 /* Now set up the aio record for the write call. */
572 a->aio_fildes = fsp->fh->fd;
573 a->aio_buf = in_data.data;
574 a->aio_nbytes = in_data.length;
575 a->aio_offset = in_offset;
576 a->aio_sigevent.sigev_notify = SIGEV_SIGNAL;
577 a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO;
578 a->aio_sigevent.sigev_value.sival_ptr = aio_ex;
580 ret = SMB_VFS_AIO_WRITE(fsp, a);
581 if (ret == -1) {
582 DEBUG(3,("smb2: aio_write failed. "
583 "Error %s\n", strerror(errno) ));
584 SMB_VFS_STRICT_UNLOCK(conn, fsp, &aio_ex->lock);
585 TALLOC_FREE(aio_ex);
586 return NT_STATUS_RETRY;
589 /* We don't need talloc_move here as both aio_ex and
590 * smbreq are children of smbreq->smb2req. */
591 aio_ex->smbreq = smbreq;
592 smbreq->async_priv = aio_ex;
594 /* This should actually be improved to span the write. */
595 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
596 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
599 * We don't want to do write behind due to ownership
600 * issues of the request structs. Maybe add it if I
601 * figure those out. JRA.
604 DEBUG(10,("smb2: scheduled aio_write for file "
605 "%s, offset %.0f, len = %u (mid = %u) "
606 "outstanding_aio_calls = %d\n",
607 fsp_str_dbg(fsp),
608 (double)in_offset,
609 (unsigned int)in_data.length,
610 (unsigned int)aio_ex->smbreq->mid,
611 outstanding_aio_calls ));
613 return NT_STATUS_OK;
616 /****************************************************************************
617 Complete the read and return the data or error back to the client.
618 Returns errno or zero if all ok.
619 *****************************************************************************/
621 static int handle_aio_read_complete(struct aio_extra *aio_ex, int errcode)
623 int outsize;
624 char *outbuf = (char *)aio_ex->outbuf.data;
625 char *data = smb_buf(outbuf);
626 ssize_t nread = SMB_VFS_AIO_RETURN(aio_ex->fsp,&aio_ex->acb);
628 if (nread < 0) {
629 /* We're relying here on the fact that if the fd is
630 closed then the aio will complete and aio_return
631 will return an error. Hopefully this is
632 true.... JRA. */
634 DEBUG( 3,( "handle_aio_read_complete: file %s nread == %d. "
635 "Error = %s\n",
636 fsp_str_dbg(aio_ex->fsp), (int)nread, strerror(errcode)));
638 ERROR_NT(map_nt_error_from_unix(errcode));
639 outsize = srv_set_message(outbuf,0,0,true);
640 } else {
641 outsize = srv_set_message(outbuf,12,nread,False);
642 SSVAL(outbuf,smb_vwv2,0xFFFF); /* Remaining - must be * -1. */
643 SSVAL(outbuf,smb_vwv5,nread);
644 SSVAL(outbuf,smb_vwv6,smb_offset(data,outbuf));
645 SSVAL(outbuf,smb_vwv7,((nread >> 16) & 1));
646 SSVAL(smb_buf(outbuf),-2,nread);
648 aio_ex->fsp->fh->pos = aio_ex->acb.aio_offset + nread;
649 aio_ex->fsp->fh->position_information = aio_ex->fsp->fh->pos;
651 DEBUG( 3, ( "handle_aio_read_complete file %s max=%d "
652 "nread=%d\n",
653 fsp_str_dbg(aio_ex->fsp),
654 (int)aio_ex->acb.aio_nbytes, (int)nread ) );
657 smb_setlen(outbuf,outsize - 4);
658 show_msg(outbuf);
659 if (!srv_send_smb(aio_ex->smbreq->sconn, outbuf,
660 true, aio_ex->smbreq->seqnum+1,
661 IS_CONN_ENCRYPTED(aio_ex->fsp->conn), NULL)) {
662 exit_server_cleanly("handle_aio_read_complete: srv_send_smb "
663 "failed.");
666 DEBUG(10,("handle_aio_read_complete: scheduled aio_read completed "
667 "for file %s, offset %.0f, len = %u\n",
668 fsp_str_dbg(aio_ex->fsp), (double)aio_ex->acb.aio_offset,
669 (unsigned int)nread ));
671 return errcode;
674 /****************************************************************************
675 Complete the write and return the data or error back to the client.
676 Returns error code or zero if all ok.
677 *****************************************************************************/
679 static int handle_aio_write_complete(struct aio_extra *aio_ex, int errcode)
681 files_struct *fsp = aio_ex->fsp;
682 char *outbuf = (char *)aio_ex->outbuf.data;
683 ssize_t numtowrite = aio_ex->acb.aio_nbytes;
684 ssize_t nwritten = SMB_VFS_AIO_RETURN(fsp,&aio_ex->acb);
686 if (fsp->aio_write_behind) {
687 if (nwritten != numtowrite) {
688 if (nwritten == -1) {
689 DEBUG(5,("handle_aio_write_complete: "
690 "aio_write_behind failed ! File %s "
691 "is corrupt ! Error %s\n",
692 fsp_str_dbg(fsp), strerror(errcode)));
693 } else {
694 DEBUG(0,("handle_aio_write_complete: "
695 "aio_write_behind failed ! File %s "
696 "is corrupt ! Wanted %u bytes but "
697 "only wrote %d\n", fsp_str_dbg(fsp),
698 (unsigned int)numtowrite,
699 (int)nwritten ));
700 errcode = EIO;
702 } else {
703 DEBUG(10,("handle_aio_write_complete: "
704 "aio_write_behind completed for file %s\n",
705 fsp_str_dbg(fsp)));
707 /* TODO: should no return 0 in case of an error !!! */
708 return 0;
711 /* We don't need outsize or set_message here as we've already set the
712 fixed size length when we set up the aio call. */
714 if(nwritten == -1) {
715 DEBUG( 3,( "handle_aio_write: file %s wanted %u bytes. "
716 "nwritten == %d. Error = %s\n",
717 fsp_str_dbg(fsp), (unsigned int)numtowrite,
718 (int)nwritten, strerror(errcode) ));
720 ERROR_NT(map_nt_error_from_unix(errcode));
721 srv_set_message(outbuf,0,0,true);
722 } else {
723 NTSTATUS status;
725 SSVAL(outbuf,smb_vwv2,nwritten);
726 SSVAL(outbuf,smb_vwv4,(nwritten>>16)&1);
727 if (nwritten < (ssize_t)numtowrite) {
728 SCVAL(outbuf,smb_rcls,ERRHRD);
729 SSVAL(outbuf,smb_err,ERRdiskfull);
732 DEBUG(3,("handle_aio_write: %s, num=%d wrote=%d\n",
733 fsp_fnum_dbg(fsp), (int)numtowrite, (int)nwritten));
734 status = sync_file(fsp->conn,fsp, aio_ex->write_through);
735 if (!NT_STATUS_IS_OK(status)) {
736 errcode = errno;
737 ERROR_BOTH(map_nt_error_from_unix(errcode),
738 ERRHRD, ERRdiskfull);
739 srv_set_message(outbuf,0,0,true);
740 DEBUG(5,("handle_aio_write: sync_file for %s returned %s\n",
741 fsp_str_dbg(fsp), nt_errstr(status)));
744 aio_ex->fsp->fh->pos = aio_ex->acb.aio_offset + nwritten;
746 mark_file_modified(aio_ex->fsp);
749 show_msg(outbuf);
750 if (!srv_send_smb(aio_ex->smbreq->sconn, outbuf,
751 true, aio_ex->smbreq->seqnum+1,
752 IS_CONN_ENCRYPTED(fsp->conn),
753 NULL)) {
754 exit_server_cleanly("handle_aio_write_complete: "
755 "srv_send_smb failed.");
758 DEBUG(10,("handle_aio_write_complete: scheduled aio_write completed "
759 "for file %s, offset %.0f, requested %u, written = %u\n",
760 fsp_str_dbg(fsp), (double)aio_ex->acb.aio_offset,
761 (unsigned int)numtowrite, (unsigned int)nwritten ));
763 return errcode;
766 /****************************************************************************
767 Complete the read and return the data or error back to the client.
768 Returns errno or zero if all ok.
769 *****************************************************************************/
771 static int handle_aio_smb2_read_complete(struct aio_extra *aio_ex, int errcode)
773 NTSTATUS status;
774 struct tevent_req *subreq = aio_ex->smbreq->smb2req->subreq;
775 ssize_t nread = SMB_VFS_AIO_RETURN(aio_ex->fsp,&aio_ex->acb);
777 /* Common error or success code processing for async or sync
778 read returns. */
780 status = smb2_read_complete(subreq, nread, errcode);
782 if (nread > 0) {
783 aio_ex->fsp->fh->pos = aio_ex->acb.aio_offset + nread;
784 aio_ex->fsp->fh->position_information = aio_ex->fsp->fh->pos;
787 DEBUG(10,("smb2: scheduled aio_read completed "
788 "for file %s, offset %.0f, len = %u "
789 "(errcode = %d, NTSTATUS = %s)\n",
790 fsp_str_dbg(aio_ex->fsp),
791 (double)aio_ex->acb.aio_offset,
792 (unsigned int)nread,
793 errcode,
794 nt_errstr(status) ));
796 if (!NT_STATUS_IS_OK(status)) {
797 tevent_req_nterror(subreq, status);
798 return errcode;
801 tevent_req_done(subreq);
802 return errcode;
805 /****************************************************************************
806 Complete the SMB2 write and return the data or error back to the client.
807 Returns error code or zero if all ok.
808 *****************************************************************************/
810 static int handle_aio_smb2_write_complete(struct aio_extra *aio_ex, int errcode)
812 files_struct *fsp = aio_ex->fsp;
813 ssize_t numtowrite = aio_ex->acb.aio_nbytes;
814 ssize_t nwritten = SMB_VFS_AIO_RETURN(fsp,&aio_ex->acb);
815 struct tevent_req *subreq = aio_ex->smbreq->smb2req->subreq;
816 NTSTATUS status;
818 status = smb2_write_complete(subreq, nwritten, errcode);
820 DEBUG(10,("smb2: scheduled aio_write completed "
821 "for file %s, offset %.0f, requested %u, "
822 "written = %u (errcode = %d, NTSTATUS = %s)\n",
823 fsp_str_dbg(fsp),
824 (double)aio_ex->acb.aio_offset,
825 (unsigned int)numtowrite,
826 (unsigned int)nwritten,
827 errcode,
828 nt_errstr(status) ));
830 if (!NT_STATUS_IS_OK(status)) {
831 tevent_req_nterror(subreq, status);
832 return errcode;
835 mark_file_modified(fsp);
837 tevent_req_done(subreq);
838 return errcode;
841 /****************************************************************************
842 Handle any aio completion. Returns True if finished (and sets *perr if err
843 was non-zero), False if not.
844 *****************************************************************************/
846 static bool handle_aio_completed(struct aio_extra *aio_ex, int *perr)
848 files_struct *fsp = NULL;
849 int err;
851 if(!aio_ex) {
852 DEBUG(3, ("handle_aio_completed: Non-existing aio_ex passed\n"));
853 return false;
856 if (!aio_ex->fsp) {
857 DEBUG(3, ("handle_aio_completed: aio_ex->fsp == NULL\n"));
858 return false;
861 fsp = aio_ex->fsp;
863 /* Ensure the operation has really completed. */
864 err = SMB_VFS_AIO_ERROR(fsp, &aio_ex->acb);
865 if (err == EINPROGRESS) {
866 DEBUG(10,( "handle_aio_completed: operation mid %llu still in "
867 "process for file %s\n",
868 (unsigned long long)aio_ex->smbreq->mid,
869 fsp_str_dbg(aio_ex->fsp)));
870 return False;
873 if (err == ECANCELED) {
874 DEBUG(10,( "handle_aio_completed: operation mid %llu canceled "
875 "for file %s\n",
876 (unsigned long long)aio_ex->smbreq->mid,
877 fsp_str_dbg(aio_ex->fsp)));
880 /* Unlock now we're done. */
881 SMB_VFS_STRICT_UNLOCK(fsp->conn, fsp, &aio_ex->lock);
883 err = aio_ex->handle_completion(aio_ex, err);
884 if (err) {
885 *perr = err; /* Only save non-zero errors. */
888 return True;
891 /****************************************************************************
892 Handle any aio completion inline.
893 *****************************************************************************/
895 void smbd_aio_complete_aio_ex(struct aio_extra *aio_ex)
897 files_struct *fsp = NULL;
898 int ret = 0;
900 DEBUG(10,("smbd_aio_complete_mid: mid[%llu]\n",
901 (unsigned long long)aio_ex->smbreq->mid));
903 fsp = aio_ex->fsp;
904 if (fsp == NULL) {
905 /* file was closed whilst I/O was outstanding. Just
906 * ignore. */
907 DEBUG( 3,( "smbd_aio_complete_mid: file closed whilst "
908 "aio outstanding (mid[%llu]).\n",
909 (unsigned long long)aio_ex->smbreq->mid));
910 return;
913 if (!handle_aio_completed(aio_ex, &ret)) {
914 return;
918 /****************************************************************************
919 We're doing write behind and the client closed the file. Wait up to 45
920 seconds (my arbitrary choice) for the aio to complete. Return 0 if all writes
921 completed, errno to return if not.
922 *****************************************************************************/
924 #define SMB_TIME_FOR_AIO_COMPLETE_WAIT 45
926 int wait_for_aio_completion(files_struct *fsp)
928 struct aio_extra *aio_ex;
929 const SMB_STRUCT_AIOCB **aiocb_list;
930 int aio_completion_count = 0;
931 time_t start_time = time_mono(NULL);
932 int seconds_left;
934 for (seconds_left = SMB_TIME_FOR_AIO_COMPLETE_WAIT;
935 seconds_left >= 0;) {
936 int err = 0;
937 int i;
938 struct timespec ts;
940 aio_completion_count = 0;
941 for( aio_ex = aio_list_head; aio_ex; aio_ex = aio_ex->next) {
942 if (aio_ex->fsp == fsp) {
943 aio_completion_count++;
947 if (!aio_completion_count) {
948 return 0;
951 DEBUG(3,("wait_for_aio_completion: waiting for %d aio events "
952 "to complete.\n", aio_completion_count ));
954 aiocb_list = SMB_MALLOC_ARRAY(const SMB_STRUCT_AIOCB *,
955 aio_completion_count);
956 if (!aiocb_list) {
957 return ENOMEM;
960 for( i = 0, aio_ex = aio_list_head;
961 aio_ex;
962 aio_ex = aio_ex->next) {
963 if (aio_ex->fsp == fsp) {
964 aiocb_list[i++] = &aio_ex->acb;
968 /* Now wait up to seconds_left for completion. */
969 ts.tv_sec = seconds_left;
970 ts.tv_nsec = 0;
972 DEBUG(10,("wait_for_aio_completion: %d events, doing a wait "
973 "of %d seconds.\n",
974 aio_completion_count, seconds_left ));
976 err = SMB_VFS_AIO_SUSPEND(fsp, aiocb_list,
977 aio_completion_count, &ts);
979 DEBUG(10,("wait_for_aio_completion: returned err = %d, "
980 "errno = %s\n", err, strerror(errno) ));
982 if (err == -1 && errno == EAGAIN) {
983 DEBUG(0,("wait_for_aio_completion: aio_suspend timed "
984 "out waiting for %d events after a wait of "
985 "%d seconds\n", aio_completion_count,
986 seconds_left));
987 /* Timeout. */
988 SAFE_FREE(aiocb_list);
989 /* We're hosed here - IO may complete
990 and trample over memory if we free
991 the aio_ex struct, but if we don't
992 we leak IO requests. I think smb_panic()
993 if the right thing to do here. JRA.
995 smb_panic("AIO suspend timed out - cannot continue.");
996 return EIO;
999 /* One or more events might have completed - process them if
1000 * so. */
1001 for( i = 0; i < aio_completion_count; i++) {
1002 aio_ex = (struct aio_extra *)aiocb_list[i]->aio_sigevent.sigev_value.sival_ptr;
1004 if (!handle_aio_completed(aio_ex, &err)) {
1005 continue;
1007 TALLOC_FREE(aio_ex);
1010 SAFE_FREE(aiocb_list);
1011 seconds_left = SMB_TIME_FOR_AIO_COMPLETE_WAIT
1012 - (time_mono(NULL) - start_time);
1015 /* We timed out - we don't know why. Return ret if already an error,
1016 * else EIO. */
1017 DEBUG(10,("wait_for_aio_completion: aio_suspend timed out waiting "
1018 "for %d events\n",
1019 aio_completion_count));
1021 return EIO;
1024 #else
1026 bool initialize_async_io_handler(void)
1028 return false;
1031 NTSTATUS schedule_aio_read_and_X(connection_struct *conn,
1032 struct smb_request *smbreq,
1033 files_struct *fsp, off_t startpos,
1034 size_t smb_maxcnt)
1036 return NT_STATUS_RETRY;
1039 NTSTATUS schedule_aio_write_and_X(connection_struct *conn,
1040 struct smb_request *smbreq,
1041 files_struct *fsp, const char *data,
1042 off_t startpos,
1043 size_t numtowrite)
1045 return NT_STATUS_RETRY;
1048 bool cancel_smb2_aio(struct smb_request *smbreq)
1050 return false;
1053 NTSTATUS schedule_smb2_aio_read(connection_struct *conn,
1054 struct smb_request *smbreq,
1055 files_struct *fsp,
1056 TALLOC_CTX *ctx,
1057 DATA_BLOB *preadbuf,
1058 off_t startpos,
1059 size_t smb_maxcnt)
1061 return NT_STATUS_RETRY;
1064 NTSTATUS schedule_aio_smb2_write(connection_struct *conn,
1065 struct smb_request *smbreq,
1066 files_struct *fsp,
1067 uint64_t in_offset,
1068 DATA_BLOB in_data,
1069 bool write_through)
1071 return NT_STATUS_RETRY;
1074 int wait_for_aio_completion(files_struct *fsp)
1076 return 0;
1079 void smbd_aio_complete_mid(uint64_t mid);
1081 #endif