2 Unix SMB/Netbios implementation.
4 async_io read handling using POSIX async io.
5 Copyright (C) Jeremy Allison 2005.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "smbd/globals.h"
26 /* The signal we'll use to signify aio done. */
28 #define RT_SIGNAL_AIO (SIGRTMIN+3)
31 #ifndef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIVAL_PTR
32 #ifdef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIGVAL_PTR
33 #define sival_int sigval_int
34 #define sival_ptr sigval_ptr
38 /****************************************************************************
39 The buffer we keep around whilst an aio request is in process.
40 *****************************************************************************/
43 struct aio_extra
*next
, *prev
;
46 struct smb_request
*req
;
48 int (*handle_completion
)(struct aio_extra
*ex
, int errcode
);
51 static int handle_aio_read_complete(struct aio_extra
*aio_ex
, int errcode
);
52 static int handle_aio_write_complete(struct aio_extra
*aio_ex
, int errcode
);
54 static int aio_extra_destructor(struct aio_extra
*aio_ex
)
56 DLIST_REMOVE(aio_list_head
, aio_ex
);
60 /****************************************************************************
61 Create the extended aio struct we must keep around for the lifetime
63 *****************************************************************************/
65 static struct aio_extra
*create_aio_extra(files_struct
*fsp
, size_t buflen
)
67 struct aio_extra
*aio_ex
= TALLOC_ZERO_P(NULL
, struct aio_extra
);
73 /* The output buffer stored in the aio_ex is the start of
74 the smb return buffer. The buffer used in the acb
75 is the start of the reply data portion of that buffer. */
77 aio_ex
->outbuf
= TALLOC_ARRAY(aio_ex
, char, buflen
);
78 if (!aio_ex
->outbuf
) {
82 DLIST_ADD(aio_list_head
, aio_ex
);
83 talloc_set_destructor(aio_ex
, aio_extra_destructor
);
88 /****************************************************************************
89 Given the mid find the extended aio struct containing it.
90 *****************************************************************************/
92 static struct aio_extra
*find_aio_ex(uint16 mid
)
96 for( p
= aio_list_head
; p
; p
= p
->next
) {
97 if (mid
== p
->req
->mid
) {
104 /****************************************************************************
105 We can have these many aio buffers in flight.
106 *****************************************************************************/
108 /****************************************************************************
109 Set up an aio request from a SMBreadX call.
110 *****************************************************************************/
112 bool schedule_aio_read_and_X(connection_struct
*conn
,
113 struct smb_request
*req
,
114 files_struct
*fsp
, SMB_OFF_T startpos
,
117 struct aio_extra
*aio_ex
;
120 size_t min_aio_read_size
= lp_aio_read_size(SNUM(conn
));
123 if (fsp
->base_fsp
!= NULL
) {
124 /* No AIO on streams yet */
125 DEBUG(10, ("AIO on streams not yet supported\n"));
129 if ((!min_aio_read_size
|| (smb_maxcnt
< min_aio_read_size
))
130 && !SMB_VFS_AIO_FORCE(fsp
)) {
131 /* Too small a read for aio request. */
132 DEBUG(10,("schedule_aio_read_and_X: read size (%u) too small "
133 "for minimum aio_read of %u\n",
134 (unsigned int)smb_maxcnt
,
135 (unsigned int)min_aio_read_size
));
139 /* Only do this on non-chained and non-chaining reads not using the
141 if (req_is_in_chain(req
) || (lp_write_cache_size(SNUM(conn
)) != 0)) {
145 if (outstanding_aio_calls
>= aio_pending_size
) {
146 DEBUG(10,("schedule_aio_read_and_X: Already have %d aio "
147 "activities outstanding.\n",
148 outstanding_aio_calls
));
152 /* The following is safe from integer wrap as we've already checked
153 smb_maxcnt is 128k or less. Wct is 12 for read replies */
155 bufsize
= smb_size
+ 12 * 2 + smb_maxcnt
;
157 if ((aio_ex
= create_aio_extra(fsp
, bufsize
)) == NULL
) {
158 DEBUG(10,("schedule_aio_read_and_X: malloc fail.\n"));
161 aio_ex
->handle_completion
= handle_aio_read_complete
;
163 construct_reply_common_req(req
, aio_ex
->outbuf
);
164 srv_set_message(aio_ex
->outbuf
, 12, 0, True
);
165 SCVAL(aio_ex
->outbuf
,smb_vwv0
,0xFF); /* Never a chained reply. */
169 /* Now set up the aio record for the read call. */
171 a
->aio_fildes
= fsp
->fh
->fd
;
172 a
->aio_buf
= smb_buf(aio_ex
->outbuf
);
173 a
->aio_nbytes
= smb_maxcnt
;
174 a
->aio_offset
= startpos
;
175 a
->aio_sigevent
.sigev_notify
= SIGEV_SIGNAL
;
176 a
->aio_sigevent
.sigev_signo
= RT_SIGNAL_AIO
;
177 a
->aio_sigevent
.sigev_value
.sival_int
= req
->mid
;
179 ret
= SMB_VFS_AIO_READ(fsp
, a
);
181 DEBUG(0,("schedule_aio_read_and_X: aio_read failed. "
182 "Error %s\n", strerror(errno
) ));
187 outstanding_aio_calls
++;
188 aio_ex
->req
= talloc_move(aio_ex
, &req
);
190 DEBUG(10,("schedule_aio_read_and_X: scheduled aio_read for file %s, "
191 "offset %.0f, len = %u (mid = %u)\n",
192 fsp_str_dbg(fsp
), (double)startpos
, (unsigned int)smb_maxcnt
,
193 (unsigned int)aio_ex
->req
->mid
));
198 /****************************************************************************
199 Set up an aio request from a SMBwriteX call.
200 *****************************************************************************/
202 bool schedule_aio_write_and_X(connection_struct
*conn
,
203 struct smb_request
*req
,
204 files_struct
*fsp
, char *data
,
208 struct aio_extra
*aio_ex
;
211 bool write_through
= BITSETW(req
->vwv
+7,0);
212 size_t min_aio_write_size
= lp_aio_write_size(SNUM(conn
));
215 if (fsp
->base_fsp
!= NULL
) {
216 /* No AIO on streams yet */
217 DEBUG(10, ("AIO on streams not yet supported\n"));
221 if ((!min_aio_write_size
|| (numtowrite
< min_aio_write_size
))
222 && !SMB_VFS_AIO_FORCE(fsp
)) {
223 /* Too small a write for aio request. */
224 DEBUG(10,("schedule_aio_write_and_X: write size (%u) too "
225 "small for minimum aio_write of %u\n",
226 (unsigned int)numtowrite
,
227 (unsigned int)min_aio_write_size
));
231 /* Only do this on non-chained and non-chaining reads not using the
233 if (req_is_in_chain(req
) || (lp_write_cache_size(SNUM(conn
)) != 0)) {
237 if (outstanding_aio_calls
>= aio_pending_size
) {
238 DEBUG(3,("schedule_aio_write_and_X: Already have %d aio "
239 "activities outstanding.\n",
240 outstanding_aio_calls
));
241 DEBUG(10,("schedule_aio_write_and_X: failed to schedule "
242 "aio_write for file %s, offset %.0f, len = %u "
244 fsp_str_dbg(fsp
), (double)startpos
,
245 (unsigned int)numtowrite
,
246 (unsigned int)req
->mid
));
250 bufsize
= smb_size
+ 6*2;
252 if (!(aio_ex
= create_aio_extra(fsp
, bufsize
))) {
253 DEBUG(0,("schedule_aio_write_and_X: malloc fail.\n"));
256 aio_ex
->handle_completion
= handle_aio_write_complete
;
258 construct_reply_common_req(req
, aio_ex
->outbuf
);
259 srv_set_message(aio_ex
->outbuf
, 6, 0, True
);
260 SCVAL(aio_ex
->outbuf
,smb_vwv0
,0xFF); /* Never a chained reply. */
264 /* Now set up the aio record for the write call. */
266 a
->aio_fildes
= fsp
->fh
->fd
;
268 a
->aio_nbytes
= numtowrite
;
269 a
->aio_offset
= startpos
;
270 a
->aio_sigevent
.sigev_notify
= SIGEV_SIGNAL
;
271 a
->aio_sigevent
.sigev_signo
= RT_SIGNAL_AIO
;
272 a
->aio_sigevent
.sigev_value
.sival_int
= req
->mid
;
274 ret
= SMB_VFS_AIO_WRITE(fsp
, a
);
276 DEBUG(3,("schedule_aio_wrote_and_X: aio_write failed. "
277 "Error %s\n", strerror(errno
) ));
282 outstanding_aio_calls
++;
283 aio_ex
->req
= talloc_move(aio_ex
, &req
);
285 /* This should actually be improved to span the write. */
286 contend_level2_oplocks_begin(fsp
, LEVEL2_CONTEND_WRITE
);
287 contend_level2_oplocks_end(fsp
, LEVEL2_CONTEND_WRITE
);
289 if (!write_through
&& !lp_syncalways(SNUM(fsp
->conn
))
290 && fsp
->aio_write_behind
) {
291 /* Lie to the client and immediately claim we finished the
293 SSVAL(aio_ex
->outbuf
,smb_vwv2
,numtowrite
);
294 SSVAL(aio_ex
->outbuf
,smb_vwv4
,(numtowrite
>>16)&1);
295 show_msg(aio_ex
->outbuf
);
296 if (!srv_send_smb(smbd_server_fd(),aio_ex
->outbuf
,
297 true, aio_ex
->req
->seqnum
+1,
298 IS_CONN_ENCRYPTED(fsp
->conn
),
299 &aio_ex
->req
->pcd
)) {
300 exit_server_cleanly("handle_aio_write: srv_send_smb "
303 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write "
304 "behind for file %s\n", fsp_str_dbg(fsp
)));
307 DEBUG(10,("schedule_aio_write_and_X: scheduled aio_write for file "
308 "%s, offset %.0f, len = %u (mid = %u) "
309 "outstanding_aio_calls = %d\n",
310 fsp_str_dbg(fsp
), (double)startpos
, (unsigned int)numtowrite
,
311 (unsigned int)aio_ex
->req
->mid
, outstanding_aio_calls
));
317 /****************************************************************************
318 Complete the read and return the data or error back to the client.
319 Returns errno or zero if all ok.
320 *****************************************************************************/
322 static int handle_aio_read_complete(struct aio_extra
*aio_ex
, int errcode
)
325 char *outbuf
= aio_ex
->outbuf
;
326 char *data
= smb_buf(outbuf
);
327 ssize_t nread
= SMB_VFS_AIO_RETURN(aio_ex
->fsp
,&aio_ex
->acb
);
330 /* We're relying here on the fact that if the fd is
331 closed then the aio will complete and aio_return
332 will return an error. Hopefully this is
335 DEBUG( 3,( "handle_aio_read_complete: file %s nread == %d. "
337 fsp_str_dbg(aio_ex
->fsp
), (int)nread
, strerror(errcode
)));
339 ERROR_NT(map_nt_error_from_unix(errcode
));
340 outsize
= srv_set_message(outbuf
,0,0,true);
342 outsize
= srv_set_message(outbuf
,12,nread
,False
);
343 SSVAL(outbuf
,smb_vwv2
,0xFFFF); /* Remaining - must be * -1. */
344 SSVAL(outbuf
,smb_vwv5
,nread
);
345 SSVAL(outbuf
,smb_vwv6
,smb_offset(data
,outbuf
));
346 SSVAL(outbuf
,smb_vwv7
,((nread
>> 16) & 1));
347 SSVAL(smb_buf(outbuf
),-2,nread
);
349 aio_ex
->fsp
->fh
->pos
= aio_ex
->acb
.aio_offset
+ nread
;
350 aio_ex
->fsp
->fh
->position_information
= aio_ex
->fsp
->fh
->pos
;
352 DEBUG( 3, ( "handle_aio_read_complete file %s max=%d "
354 fsp_str_dbg(aio_ex
->fsp
),
355 (int)aio_ex
->acb
.aio_nbytes
, (int)nread
) );
358 smb_setlen(outbuf
,outsize
- 4);
360 if (!srv_send_smb(smbd_server_fd(),outbuf
,
361 true, aio_ex
->req
->seqnum
+1,
362 IS_CONN_ENCRYPTED(aio_ex
->fsp
->conn
), NULL
)) {
363 exit_server_cleanly("handle_aio_read_complete: srv_send_smb "
367 DEBUG(10,("handle_aio_read_complete: scheduled aio_read completed "
368 "for file %s, offset %.0f, len = %u\n",
369 fsp_str_dbg(aio_ex
->fsp
), (double)aio_ex
->acb
.aio_offset
,
370 (unsigned int)nread
));
375 /****************************************************************************
376 Complete the write and return the data or error back to the client.
377 Returns error code or zero if all ok.
378 *****************************************************************************/
380 static int handle_aio_write_complete(struct aio_extra
*aio_ex
, int errcode
)
382 files_struct
*fsp
= aio_ex
->fsp
;
383 char *outbuf
= aio_ex
->outbuf
;
384 ssize_t numtowrite
= aio_ex
->acb
.aio_nbytes
;
385 ssize_t nwritten
= SMB_VFS_AIO_RETURN(fsp
,&aio_ex
->acb
);
387 if (fsp
->aio_write_behind
) {
388 if (nwritten
!= numtowrite
) {
389 if (nwritten
== -1) {
390 DEBUG(5,("handle_aio_write_complete: "
391 "aio_write_behind failed ! File %s "
392 "is corrupt ! Error %s\n",
393 fsp_str_dbg(fsp
), strerror(errcode
)));
395 DEBUG(0,("handle_aio_write_complete: "
396 "aio_write_behind failed ! File %s "
397 "is corrupt ! Wanted %u bytes but "
398 "only wrote %d\n", fsp_str_dbg(fsp
),
399 (unsigned int)numtowrite
,
404 DEBUG(10,("handle_aio_write_complete: "
405 "aio_write_behind completed for file %s\n",
408 /* TODO: should no return 0 in case of an error !!! */
412 /* We don't need outsize or set_message here as we've already set the
413 fixed size length when we set up the aio call. */
416 DEBUG( 3,( "handle_aio_write: file %s wanted %u bytes. "
417 "nwritten == %d. Error = %s\n",
418 fsp_str_dbg(fsp
), (unsigned int)numtowrite
,
419 (int)nwritten
, strerror(errcode
) ));
421 ERROR_NT(map_nt_error_from_unix(errcode
));
422 srv_set_message(outbuf
,0,0,true);
424 bool write_through
= BITSETW(aio_ex
->req
->vwv
+7,0);
427 SSVAL(outbuf
,smb_vwv2
,nwritten
);
428 SSVAL(outbuf
,smb_vwv4
,(nwritten
>>16)&1);
429 if (nwritten
< (ssize_t
)numtowrite
) {
430 SCVAL(outbuf
,smb_rcls
,ERRHRD
);
431 SSVAL(outbuf
,smb_err
,ERRdiskfull
);
434 DEBUG(3,("handle_aio_write: fnum=%d num=%d wrote=%d\n",
435 fsp
->fnum
, (int)numtowrite
, (int)nwritten
));
436 status
= sync_file(fsp
->conn
,fsp
, write_through
);
437 if (!NT_STATUS_IS_OK(status
)) {
439 ERROR_BOTH(map_nt_error_from_unix(errcode
),
440 ERRHRD
, ERRdiskfull
);
441 srv_set_message(outbuf
,0,0,true);
442 DEBUG(5,("handle_aio_write: sync_file for %s returned %s\n",
443 fsp_str_dbg(fsp
), nt_errstr(status
)));
446 aio_ex
->fsp
->fh
->pos
= aio_ex
->acb
.aio_offset
+ nwritten
;
450 if (!srv_send_smb(smbd_server_fd(),outbuf
,
451 true, aio_ex
->req
->seqnum
+1,
452 IS_CONN_ENCRYPTED(fsp
->conn
),
454 exit_server_cleanly("handle_aio_write: srv_send_smb failed.");
457 DEBUG(10,("handle_aio_write_complete: scheduled aio_write completed "
458 "for file %s, offset %.0f, requested %u, written = %u\n",
459 fsp_str_dbg(fsp
), (double)aio_ex
->acb
.aio_offset
,
460 (unsigned int)numtowrite
, (unsigned int)nwritten
));
465 /****************************************************************************
466 Handle any aio completion. Returns True if finished (and sets *perr if err
467 was non-zero), False if not.
468 *****************************************************************************/
470 static bool handle_aio_completed(struct aio_extra
*aio_ex
, int *perr
)
475 DEBUG(3, ("handle_aio_completed: Non-existing aio_ex passed\n"));
479 /* Ensure the operation has really completed. */
480 err
= SMB_VFS_AIO_ERROR(aio_ex
->fsp
, &aio_ex
->acb
);
481 if (err
== EINPROGRESS
) {
482 DEBUG(10,( "handle_aio_completed: operation mid %u still in "
483 "process for file %s\n",
484 aio_ex
->req
->mid
, fsp_str_dbg(aio_ex
->fsp
)));
486 } else if (err
== ECANCELED
) {
487 /* If error is ECANCELED then don't return anything to the
489 DEBUG(10,( "handle_aio_completed: operation mid %u"
490 " canceled\n", aio_ex
->req
->mid
));
494 err
= aio_ex
->handle_completion(aio_ex
, err
);
496 *perr
= err
; /* Only save non-zero errors. */
502 /****************************************************************************
503 Handle any aio completion inline.
504 *****************************************************************************/
506 void smbd_aio_complete_mid(unsigned int mid
)
508 files_struct
*fsp
= NULL
;
509 struct aio_extra
*aio_ex
= find_aio_ex(mid
);
512 outstanding_aio_calls
--;
514 DEBUG(10,("smbd_aio_complete_mid: mid[%u]\n", mid
));
517 DEBUG(3,("smbd_aio_complete_mid: Can't find record to "
518 "match mid %u.\n", mid
));
524 /* file was closed whilst I/O was outstanding. Just
526 DEBUG( 3,( "smbd_aio_complete_mid: file closed whilst "
527 "aio outstanding (mid[%u]).\n", mid
));
531 if (!handle_aio_completed(aio_ex
, &ret
)) {
538 static void smbd_aio_signal_handler(struct tevent_context
*ev_ctx
,
539 struct tevent_signal
*se
,
540 int signum
, int count
,
541 void *_info
, void *private_data
)
543 siginfo_t
*info
= (siginfo_t
*)_info
;
544 unsigned int mid
= (unsigned int)info
->si_value
.sival_int
;
546 smbd_aio_complete_mid(mid
);
549 /****************************************************************************
550 We're doing write behind and the client closed the file. Wait up to 30
551 seconds (my arbitrary choice) for the aio to complete. Return 0 if all writes
552 completed, errno to return if not.
553 *****************************************************************************/
555 #define SMB_TIME_FOR_AIO_COMPLETE_WAIT 29
557 int wait_for_aio_completion(files_struct
*fsp
)
559 struct aio_extra
*aio_ex
;
560 const SMB_STRUCT_AIOCB
**aiocb_list
;
561 int aio_completion_count
= 0;
562 time_t start_time
= time(NULL
);
565 for (seconds_left
= SMB_TIME_FOR_AIO_COMPLETE_WAIT
;
566 seconds_left
>= 0;) {
571 aio_completion_count
= 0;
572 for( aio_ex
= aio_list_head
; aio_ex
; aio_ex
= aio_ex
->next
) {
573 if (aio_ex
->fsp
== fsp
) {
574 aio_completion_count
++;
578 if (!aio_completion_count
) {
582 DEBUG(3,("wait_for_aio_completion: waiting for %d aio events "
583 "to complete.\n", aio_completion_count
));
585 aiocb_list
= SMB_MALLOC_ARRAY(const SMB_STRUCT_AIOCB
*,
586 aio_completion_count
);
591 for( i
= 0, aio_ex
= aio_list_head
;
593 aio_ex
= aio_ex
->next
) {
594 if (aio_ex
->fsp
== fsp
) {
595 aiocb_list
[i
++] = &aio_ex
->acb
;
599 /* Now wait up to seconds_left for completion. */
600 ts
.tv_sec
= seconds_left
;
603 DEBUG(10,("wait_for_aio_completion: %d events, doing a wait "
605 aio_completion_count
, seconds_left
));
607 err
= SMB_VFS_AIO_SUSPEND(fsp
, aiocb_list
,
608 aio_completion_count
, &ts
);
610 DEBUG(10,("wait_for_aio_completion: returned err = %d, "
611 "errno = %s\n", err
, strerror(errno
) ));
613 if (err
== -1 && errno
== EAGAIN
) {
614 DEBUG(0,("wait_for_aio_completion: aio_suspend timed "
615 "out waiting for %d events after a wait of "
616 "%d seconds\n", aio_completion_count
,
619 cancel_aio_by_fsp(fsp
);
620 SAFE_FREE(aiocb_list
);
624 /* One or more events might have completed - process them if
626 for( i
= 0; i
< aio_completion_count
; i
++) {
627 uint16 mid
= aiocb_list
[i
]->aio_sigevent
.sigev_value
.sival_int
;
629 aio_ex
= find_aio_ex(mid
);
632 DEBUG(0, ("wait_for_aio_completion: mid %u "
633 "doesn't match an aio record\n",
634 (unsigned int)mid
));
638 if (!handle_aio_completed(aio_ex
, &err
)) {
644 SAFE_FREE(aiocb_list
);
645 seconds_left
= SMB_TIME_FOR_AIO_COMPLETE_WAIT
646 - (time(NULL
) - start_time
);
649 /* We timed out - we don't know why. Return ret if already an error,
651 DEBUG(10,("wait_for_aio_completion: aio_suspend timed out waiting "
653 aio_completion_count
));
658 /****************************************************************************
659 Cancel any outstanding aio requests. The client doesn't care about the reply.
660 *****************************************************************************/
662 void cancel_aio_by_fsp(files_struct
*fsp
)
664 struct aio_extra
*aio_ex
;
666 for( aio_ex
= aio_list_head
; aio_ex
; aio_ex
= aio_ex
->next
) {
667 if (aio_ex
->fsp
== fsp
) {
668 /* Don't delete the aio_extra record as we may have
669 completed and don't yet know it. Just do the
670 aio_cancel call and return. */
671 SMB_VFS_AIO_CANCEL(fsp
, &aio_ex
->acb
);
672 aio_ex
->fsp
= NULL
; /* fsp will be closed when we
678 /****************************************************************************
679 Initialize the signal handler for aio read/write.
680 *****************************************************************************/
682 void initialize_async_io_handler(void)
684 aio_signal_event
= tevent_add_signal(smbd_event_context(),
685 smbd_event_context(),
686 RT_SIGNAL_AIO
, SA_SIGINFO
,
687 smbd_aio_signal_handler
,
689 if (!aio_signal_event
) {
690 exit_server("Failed to setup RT_SIGNAL_AIO handler");
693 /* tevent supports 100 signal with SA_SIGINFO */
694 aio_pending_size
= 100;
698 void initialize_async_io_handler(void)
702 bool schedule_aio_read_and_X(connection_struct
*conn
,
703 struct smb_request
*req
,
704 files_struct
*fsp
, SMB_OFF_T startpos
,
710 bool schedule_aio_write_and_X(connection_struct
*conn
,
711 struct smb_request
*req
,
712 files_struct
*fsp
, char *data
,
719 void cancel_aio_by_fsp(files_struct
*fsp
)
723 int wait_for_aio_completion(files_struct
*fsp
)
728 void smbd_aio_complete_mid(unsigned int mid
);