vfs_gpfs: Move call to load GPFS library
[Samba.git] / source3 / smbd / smb2_aio.c
blob8c01c76a3e94341cb68fda79622a6a96b3fc609b
1 /*
2 Unix SMB/Netbios implementation.
3 Version 3.0
4 async_io read handling using POSIX async io.
5 Copyright (C) Jeremy Allison 2005.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "includes.h"
22 #include "smbd/smbd.h"
23 #include "smbd/globals.h"
24 #include "../lib/util/tevent_ntstatus.h"
25 #include "../lib/util/tevent_unix.h"
27 /****************************************************************************
28 Accessor function to return write_through state.
29 *****************************************************************************/
31 bool aio_write_through_requested(struct aio_extra *aio_ex)
33 return aio_ex->write_through;
36 /****************************************************************************
37 Create the extended aio struct we must keep around for the lifetime
38 of the aio call.
39 *****************************************************************************/
41 struct aio_extra *create_aio_extra(TALLOC_CTX *mem_ctx,
42 files_struct *fsp,
43 size_t buflen)
45 struct aio_extra *aio_ex = talloc_zero(mem_ctx, struct aio_extra);
47 if (!aio_ex) {
48 return NULL;
51 /* The output buffer stored in the aio_ex is the start of
52 the smb return buffer. The buffer used in the acb
53 is the start of the reply data portion of that buffer. */
55 if (buflen) {
56 aio_ex->outbuf = data_blob_talloc(aio_ex, NULL, buflen);
57 if (!aio_ex->outbuf.data) {
58 TALLOC_FREE(aio_ex);
59 return NULL;
62 aio_ex->fsp = fsp;
63 return aio_ex;
66 struct aio_req_fsp_link {
67 files_struct *fsp;
68 struct tevent_req *req;
71 static int aio_del_req_from_fsp(struct aio_req_fsp_link *lnk)
73 unsigned i;
74 files_struct *fsp = lnk->fsp;
75 struct tevent_req *req = lnk->req;
77 for (i=0; i<fsp->num_aio_requests; i++) {
78 if (fsp->aio_requests[i] == req) {
79 break;
82 if (i == fsp->num_aio_requests) {
83 DEBUG(1, ("req %p not found in fsp %p\n", req, fsp));
84 return 0;
86 fsp->num_aio_requests -= 1;
87 fsp->aio_requests[i] = fsp->aio_requests[fsp->num_aio_requests];
89 if (fsp->num_aio_requests == 0) {
90 TALLOC_FREE(fsp->aio_requests);
92 return 0;
95 bool aio_add_req_to_fsp(files_struct *fsp, struct tevent_req *req)
97 size_t array_len;
98 struct aio_req_fsp_link *lnk;
100 lnk = talloc(req, struct aio_req_fsp_link);
101 if (lnk == NULL) {
102 return false;
105 array_len = talloc_array_length(fsp->aio_requests);
106 if (array_len <= fsp->num_aio_requests) {
107 struct tevent_req **tmp;
109 if (fsp->num_aio_requests + 10 < 10) {
110 /* Integer wrap. */
111 TALLOC_FREE(lnk);
112 return false;
116 * Allocate in blocks of 10 so we don't allocate
117 * on every aio request.
119 tmp = talloc_realloc(
120 fsp, fsp->aio_requests, struct tevent_req *,
121 fsp->num_aio_requests+10);
122 if (tmp == NULL) {
123 TALLOC_FREE(lnk);
124 return false;
126 fsp->aio_requests = tmp;
128 fsp->aio_requests[fsp->num_aio_requests] = req;
129 fsp->num_aio_requests += 1;
131 lnk->fsp = fsp;
132 lnk->req = req;
133 talloc_set_destructor(lnk, aio_del_req_from_fsp);
135 return true;
138 struct pwrite_fsync_state {
139 struct tevent_context *ev;
140 files_struct *fsp;
141 bool write_through;
142 ssize_t nwritten;
145 static void pwrite_fsync_write_done(struct tevent_req *subreq);
146 static void pwrite_fsync_sync_done(struct tevent_req *subreq);
148 struct tevent_req *pwrite_fsync_send(TALLOC_CTX *mem_ctx,
149 struct tevent_context *ev,
150 struct files_struct *fsp,
151 const void *data,
152 size_t n, off_t offset,
153 bool write_through)
155 struct tevent_req *req, *subreq;
156 struct pwrite_fsync_state *state;
157 bool ok;
159 req = tevent_req_create(mem_ctx, &state, struct pwrite_fsync_state);
160 if (req == NULL) {
161 return NULL;
163 state->ev = ev;
164 state->fsp = fsp;
165 state->write_through = write_through;
167 ok = vfs_valid_pwrite_range(offset, n);
168 if (!ok) {
169 tevent_req_error(req, EINVAL);
170 return tevent_req_post(req, ev);
173 if (n == 0) {
174 tevent_req_done(req);
175 return tevent_req_post(req, ev);
178 subreq = SMB_VFS_PWRITE_SEND(state, ev, fsp, data, n, offset);
179 if (tevent_req_nomem(subreq, req)) {
180 return tevent_req_post(req, ev);
182 tevent_req_set_callback(subreq, pwrite_fsync_write_done, req);
183 return req;
186 static void pwrite_fsync_write_done(struct tevent_req *subreq)
188 struct tevent_req *req = tevent_req_callback_data(
189 subreq, struct tevent_req);
190 struct pwrite_fsync_state *state = tevent_req_data(
191 req, struct pwrite_fsync_state);
192 connection_struct *conn = state->fsp->conn;
193 bool do_sync;
194 struct vfs_aio_state vfs_aio_state;
196 state->nwritten = SMB_VFS_PWRITE_RECV(subreq, &vfs_aio_state);
197 TALLOC_FREE(subreq);
198 if (state->nwritten == -1) {
199 tevent_req_error(req, vfs_aio_state.error);
200 return;
203 do_sync = (lp_strict_sync(SNUM(conn)) &&
204 (lp_sync_always(SNUM(conn)) || state->write_through));
205 if (!do_sync) {
206 tevent_req_done(req);
207 return;
210 subreq = SMB_VFS_FSYNC_SEND(state, state->ev, state->fsp);
211 if (tevent_req_nomem(subreq, req)) {
212 return;
214 tevent_req_set_callback(subreq, pwrite_fsync_sync_done, req);
217 static void pwrite_fsync_sync_done(struct tevent_req *subreq)
219 struct tevent_req *req = tevent_req_callback_data(
220 subreq, struct tevent_req);
221 int ret;
222 struct vfs_aio_state vfs_aio_state;
224 ret = SMB_VFS_FSYNC_RECV(subreq, &vfs_aio_state);
225 TALLOC_FREE(subreq);
226 if (ret == -1) {
227 tevent_req_error(req, vfs_aio_state.error);
228 return;
230 tevent_req_done(req);
233 ssize_t pwrite_fsync_recv(struct tevent_req *req, int *perr)
235 struct pwrite_fsync_state *state = tevent_req_data(
236 req, struct pwrite_fsync_state);
238 if (tevent_req_is_unix_error(req, perr)) {
239 return -1;
241 return state->nwritten;
244 bool cancel_smb2_aio(struct smb_request *smbreq)
246 struct smbd_smb2_request *smb2req = smbreq->smb2req;
247 struct aio_extra *aio_ex = NULL;
249 if (smb2req) {
250 aio_ex = talloc_get_type(smbreq->async_priv,
251 struct aio_extra);
254 if (aio_ex == NULL) {
255 return false;
258 if (aio_ex->fsp == NULL) {
259 return false;
263 * We let the aio request run and don't try to cancel it which means
264 * processing of the SMB2 request must continue as normal, cf MS-SMB2
265 * 3.3.5.16:
267 * If the target request is not successfully canceled, processing of
268 * the target request MUST continue and no response is sent to the
269 * cancel request.
272 return false;
275 static void aio_pread_smb2_done(struct tevent_req *req);
277 /****************************************************************************
278 Set up an aio request from a SMB2 read call.
279 *****************************************************************************/
281 NTSTATUS schedule_smb2_aio_read(connection_struct *conn,
282 struct smb_request *smbreq,
283 files_struct *fsp,
284 TALLOC_CTX *ctx,
285 DATA_BLOB *preadbuf,
286 off_t startpos,
287 size_t smb_maxcnt)
289 struct aio_extra *aio_ex;
290 size_t min_aio_read_size = lp_aio_read_size(SNUM(conn));
291 struct tevent_req *req;
292 bool is_compound = false;
293 bool is_last_in_compound = false;
294 bool ok;
296 ok = vfs_valid_pread_range(startpos, smb_maxcnt);
297 if (!ok) {
298 return NT_STATUS_INVALID_PARAMETER;
301 if (fsp_is_alternate_stream(fsp)) {
302 DEBUG(10, ("AIO on streams not yet supported\n"));
303 return NT_STATUS_RETRY;
306 if (fsp->op == NULL) {
307 /* No AIO on internal opens. */
308 return NT_STATUS_RETRY;
311 if ((!min_aio_read_size || (smb_maxcnt < min_aio_read_size))
312 && !SMB_VFS_AIO_FORCE(fsp)) {
313 /* Too small a read for aio request. */
314 DEBUG(10,("smb2: read size (%u) too small "
315 "for minimum aio_read of %u\n",
316 (unsigned int)smb_maxcnt,
317 (unsigned int)min_aio_read_size ));
318 return NT_STATUS_RETRY;
321 is_compound = smbd_smb2_is_compound(smbreq->smb2req);
322 is_last_in_compound = smbd_smb2_is_last_in_compound(smbreq->smb2req);
324 if (is_compound && !is_last_in_compound) {
326 * Only allow going async if this is the last
327 * request in a compound.
329 return NT_STATUS_RETRY;
332 /* Create the out buffer. */
333 *preadbuf = data_blob_talloc(ctx, NULL, smb_maxcnt);
334 if (preadbuf->data == NULL) {
335 return NT_STATUS_NO_MEMORY;
338 if (!(aio_ex = create_aio_extra(smbreq->smb2req, fsp, 0))) {
339 return NT_STATUS_NO_MEMORY;
342 init_strict_lock_struct(fsp,
343 fsp->op->global->open_persistent_id,
344 (uint64_t)startpos,
345 (uint64_t)smb_maxcnt,
346 READ_LOCK,
347 lp_posix_cifsu_locktype(fsp),
348 &aio_ex->lock);
350 /* Take the lock until the AIO completes. */
351 if (!SMB_VFS_STRICT_LOCK_CHECK(conn, fsp, &aio_ex->lock)) {
352 TALLOC_FREE(aio_ex);
353 return NT_STATUS_FILE_LOCK_CONFLICT;
356 aio_ex->nbyte = smb_maxcnt;
357 aio_ex->offset = startpos;
359 req = SMB_VFS_PREAD_SEND(aio_ex, fsp->conn->sconn->ev_ctx, fsp,
360 preadbuf->data, smb_maxcnt, startpos);
361 if (req == NULL) {
362 DEBUG(0, ("smb2: SMB_VFS_PREAD_SEND failed. "
363 "Error %s\n", strerror(errno)));
364 TALLOC_FREE(aio_ex);
365 return NT_STATUS_RETRY;
367 tevent_req_set_callback(req, aio_pread_smb2_done, aio_ex);
369 if (!aio_add_req_to_fsp(fsp, req)) {
370 DEBUG(1, ("Could not add req to fsp\n"));
371 TALLOC_FREE(aio_ex);
372 return NT_STATUS_RETRY;
375 /* We don't need talloc_move here as both aio_ex and
376 * smbreq are children of smbreq->smb2req. */
377 aio_ex->smbreq = smbreq;
378 smbreq->async_priv = aio_ex;
380 DEBUG(10,("smb2: scheduled aio_read for file %s, "
381 "offset %.0f, len = %u (mid = %u)\n",
382 fsp_str_dbg(fsp), (double)startpos, (unsigned int)smb_maxcnt,
383 (unsigned int)aio_ex->smbreq->mid ));
385 return NT_STATUS_OK;
388 static void aio_pread_smb2_done(struct tevent_req *req)
390 struct aio_extra *aio_ex = tevent_req_callback_data(
391 req, struct aio_extra);
392 struct tevent_req *subreq = aio_ex->smbreq->smb2req->subreq;
393 files_struct *fsp = aio_ex->fsp;
394 NTSTATUS status;
395 ssize_t nread;
396 struct vfs_aio_state vfs_aio_state = { 0 };
398 nread = SMB_VFS_PREAD_RECV(req, &vfs_aio_state);
399 TALLOC_FREE(req);
401 DEBUG(10, ("pread_recv returned %d, err = %s\n", (int)nread,
402 (nread == -1) ? strerror(vfs_aio_state.error) : "no error"));
404 /* Common error or success code processing for async or sync
405 read returns. */
407 status = smb2_read_complete(subreq, nread, vfs_aio_state.error);
409 if (nread > 0) {
410 fh_set_pos(fsp->fh, aio_ex->offset + nread);
411 fh_set_position_information(fsp->fh,
412 fh_get_pos(fsp->fh));
415 DEBUG(10, ("smb2: scheduled aio_read completed "
416 "for file %s, offset %.0f, len = %u "
417 "(errcode = %d, NTSTATUS = %s)\n",
418 fsp_str_dbg(aio_ex->fsp),
419 (double)aio_ex->offset,
420 (unsigned int)nread,
421 vfs_aio_state.error, nt_errstr(status)));
423 if (tevent_req_nterror(subreq, status)) {
424 return;
426 tevent_req_done(subreq);
429 static void aio_pwrite_smb2_done(struct tevent_req *req);
431 /****************************************************************************
432 Set up an aio request from a SMB2write call.
433 *****************************************************************************/
435 NTSTATUS schedule_aio_smb2_write(connection_struct *conn,
436 struct smb_request *smbreq,
437 files_struct *fsp,
438 uint64_t in_offset,
439 DATA_BLOB in_data,
440 bool write_through)
442 struct aio_extra *aio_ex = NULL;
443 size_t min_aio_write_size = lp_aio_write_size(SNUM(conn));
444 struct tevent_req *req;
445 bool is_compound = false;
446 bool is_last_in_compound = false;
448 if (fsp_is_alternate_stream(fsp)) {
449 /* No AIO on streams yet */
450 DEBUG(10, ("AIO on streams not yet supported\n"));
451 return NT_STATUS_RETRY;
454 if (fsp->op == NULL) {
455 /* No AIO on internal opens. */
456 return NT_STATUS_RETRY;
459 if ((!min_aio_write_size || (in_data.length < min_aio_write_size))
460 && !SMB_VFS_AIO_FORCE(fsp)) {
461 /* Too small a write for aio request. */
462 DEBUG(10,("smb2: write size (%u) too "
463 "small for minimum aio_write of %u\n",
464 (unsigned int)in_data.length,
465 (unsigned int)min_aio_write_size ));
466 return NT_STATUS_RETRY;
469 is_compound = smbd_smb2_is_compound(smbreq->smb2req);
470 is_last_in_compound = smbd_smb2_is_last_in_compound(smbreq->smb2req);
472 if (is_compound && !is_last_in_compound) {
474 * Only allow going async if this is the last
475 * request in a compound.
477 return NT_STATUS_RETRY;
480 if (smbreq->unread_bytes) {
481 /* Can't do async with recvfile. */
482 return NT_STATUS_RETRY;
485 if (!(aio_ex = create_aio_extra(smbreq->smb2req, fsp, 0))) {
486 return NT_STATUS_NO_MEMORY;
489 aio_ex->write_through = write_through;
491 init_strict_lock_struct(fsp,
492 fsp->op->global->open_persistent_id,
493 in_offset,
494 (uint64_t)in_data.length,
495 WRITE_LOCK,
496 lp_posix_cifsu_locktype(fsp),
497 &aio_ex->lock);
499 /* Take the lock until the AIO completes. */
500 if (!SMB_VFS_STRICT_LOCK_CHECK(conn, fsp, &aio_ex->lock)) {
501 TALLOC_FREE(aio_ex);
502 return NT_STATUS_FILE_LOCK_CONFLICT;
505 aio_ex->nbyte = in_data.length;
506 aio_ex->offset = in_offset;
508 req = pwrite_fsync_send(aio_ex, fsp->conn->sconn->ev_ctx, fsp,
509 in_data.data, in_data.length, in_offset,
510 write_through);
511 if (req == NULL) {
512 DEBUG(3, ("smb2: SMB_VFS_PWRITE_SEND failed. "
513 "Error %s\n", strerror(errno)));
514 TALLOC_FREE(aio_ex);
515 return NT_STATUS_RETRY;
517 tevent_req_set_callback(req, aio_pwrite_smb2_done, aio_ex);
519 if (!aio_add_req_to_fsp(fsp, req)) {
520 DEBUG(1, ("Could not add req to fsp\n"));
521 TALLOC_FREE(aio_ex);
522 return NT_STATUS_RETRY;
525 /* We don't need talloc_move here as both aio_ex and
526 * smbreq are children of smbreq->smb2req. */
527 aio_ex->smbreq = smbreq;
528 smbreq->async_priv = aio_ex;
530 /* This should actually be improved to span the write. */
531 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
532 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
535 * We don't want to do write behind due to ownership
536 * issues of the request structs. Maybe add it if I
537 * figure those out. JRA.
540 DEBUG(10,("smb2: scheduled aio_write for file "
541 "%s, offset %.0f, len = %u (mid = %u)\n",
542 fsp_str_dbg(fsp),
543 (double)in_offset,
544 (unsigned int)in_data.length,
545 (unsigned int)aio_ex->smbreq->mid));
547 return NT_STATUS_OK;
550 static void aio_pwrite_smb2_done(struct tevent_req *req)
552 struct aio_extra *aio_ex = tevent_req_callback_data(
553 req, struct aio_extra);
554 ssize_t numtowrite = aio_ex->nbyte;
555 struct tevent_req *subreq = aio_ex->smbreq->smb2req->subreq;
556 files_struct *fsp = aio_ex->fsp;
557 NTSTATUS status;
558 ssize_t nwritten;
559 int err = 0;
561 nwritten = pwrite_fsync_recv(req, &err);
562 TALLOC_FREE(req);
564 DEBUG(10, ("pwrite_recv returned %d, err = %s\n", (int)nwritten,
565 (nwritten == -1) ? strerror(err) : "no error"));
567 mark_file_modified(fsp);
569 status = smb2_write_complete_nosync(subreq, nwritten, err);
571 DEBUG(10, ("smb2: scheduled aio_write completed "
572 "for file %s, offset %.0f, requested %u, "
573 "written = %u (errcode = %d, NTSTATUS = %s)\n",
574 fsp_str_dbg(fsp),
575 (double)aio_ex->offset,
576 (unsigned int)numtowrite,
577 (unsigned int)nwritten,
578 err, nt_errstr(status)));
580 if (tevent_req_nterror(subreq, status)) {
581 return;
583 tevent_req_done(subreq);