Fix headers, ldb_includes.h is a private header,
[Samba/bjacke.git] / source3 / modules / onefs_dir.c
blob83622b2bcdabb848e32f452152edfefbabfe139d
1 /*
2 * Unix SMB/CIFS implementation.
4 * Support for OneFS bulk directory enumeration API
6 * Copyright (C) Steven Danneman, 2009
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
22 #include "onefs.h"
24 #include <ifs/ifs_syscalls.h>
26 /* The OneFS filesystem provides a readdirplus() syscall, equivalent to the
27 * NFSv3 PDU, which retrieves bulk directory listings with stat information
28 * in a single syscall.
30 * This file hides this bulk interface underneath Samba's very POSIX like
31 * opendir/readdir/telldir VFS interface. This is done to provide a
32 * significant performance improvement when listing the contents of large
33 * directories, which also require file meta information. ie a typical
34 * Windows Explorer request.
37 #define RDP_RESUME_KEY_START 0x1
39 #define RDP_BATCH_SIZE 128
40 #define RDP_DIRENTRIES_SIZE ((size_t)(RDP_BATCH_SIZE * sizeof(struct dirent)))
42 static char *rdp_direntries = NULL;
43 static SMB_STRUCT_STAT *rdp_stats = NULL;
44 static uint64_t *rdp_cookies = NULL;
46 struct rdp_dir_state {
47 struct rdp_dir_state *next, *prev;
48 SMB_STRUCT_DIR *dirp;
49 char *direntries_cursor; /* cursor to current direntry in the cache */
50 size_t stat_count; /* number of entries stored in the cache */
51 size_t stat_cursor; /* cursor to current stat in the cache */
52 uint64_t resume_cookie; /* last cookie returned from the cache */
53 long location; /* absolute location of direnty in DIR */
56 static struct rdp_dir_state *dirstatelist = NULL;
58 SMB_STRUCT_DIR *rdp_last_dirp = NULL;
60 /**
61 * Given a DIR pointer, return our internal state.
63 * This function also tells us whether the given DIR is the same as we saw
64 * during the last call. Because we use a single globally allocated buffer
65 * for readdirplus entries we must check every call into this API to see if
66 * it's for the same directory listing, or a new one. If it's the same we can
67 * maintain our current cached entries, otherwise we must go to the kernel.
69 * @return 0 on success, 1 on failure
71 static int
72 rdp_retrieve_dir_state(SMB_STRUCT_DIR *dirp, struct rdp_dir_state **dir_state,
73 bool *same_as_last)
75 struct rdp_dir_state *dsp;
77 /* Is this directory the same as the last call */
78 *same_as_last = (dirp == rdp_last_dirp);
80 for(dsp = dirstatelist; dsp; dsp = dsp->next)
81 if (dsp->dirp == dirp) {
82 *dir_state = dsp;
83 return 0;
86 /* Couldn't find existing dir_state for the given directory
87 * pointer. */
88 return 1;
91 /**
92 * Initialize the global readdirplus buffers.
94 * These same buffers are used for all calls into readdirplus.
96 * @return 0 on success, errno value on failure
98 static int
99 rdp_init(struct rdp_dir_state *dsp)
101 /* Unfortunately, there is no good way to free these buffers. If we
102 * allocated and freed for every DIR handle performance would be
103 * adversely affected. For now these buffers will be leaked and only
104 * freed when the smbd process dies. */
105 if (!rdp_direntries) {
106 rdp_direntries = SMB_MALLOC(RDP_DIRENTRIES_SIZE);
107 if (!rdp_direntries)
108 return ENOMEM;
111 if (!rdp_stats) {
112 rdp_stats =
113 SMB_MALLOC(RDP_BATCH_SIZE * sizeof(SMB_STRUCT_STAT));
114 if (!rdp_stats)
115 return ENOMEM;
118 if (!rdp_cookies) {
119 rdp_cookies = SMB_MALLOC(RDP_BATCH_SIZE * sizeof(uint64_t));
120 if (!rdp_cookies)
121 return ENOMEM;
124 dsp->direntries_cursor = rdp_direntries + RDP_DIRENTRIES_SIZE;
125 dsp->stat_count = RDP_BATCH_SIZE;
126 dsp->stat_cursor = RDP_BATCH_SIZE;
127 dsp->resume_cookie = RDP_RESUME_KEY_START;
128 dsp->location = 0;
130 return 0;
134 * Call into readdirplus() to refill our global dirent cache.
136 * This function also resets all cursors back to the beginning of the cache.
137 * All stat buffers are retrieved by following symlinks.
139 * @return number of entries retrieved, -1 on error
141 static int
142 rdp_fill_cache(struct rdp_dir_state *dsp)
144 int nread, dirfd;
146 dirfd = dirfd(dsp->dirp);
147 if (dirfd < 0) {
148 DEBUG(1, ("Could not retrieve fd for DIR\n"));
149 return -1;
152 /* Resize the stat_count to grab as many entries as possible */
153 dsp->stat_count = RDP_BATCH_SIZE;
155 DEBUG(9, ("Calling readdirplus() with DIR %p, dirfd: %d, "
156 "resume_cookie 0x%llx, location %u, size_to_read: %zu, "
157 "direntries_size: %zu, stat_count: %u\n",
158 dsp->dirp, dirfd, dsp->resume_cookie, dsp->location,
159 RDP_BATCH_SIZE, RDP_DIRENTRIES_SIZE, dsp->stat_count));
161 nread = readdirplus(dirfd,
162 RDP_FOLLOW,
163 &dsp->resume_cookie,
164 RDP_BATCH_SIZE,
165 rdp_direntries,
166 RDP_DIRENTRIES_SIZE,
167 &dsp->stat_count,
168 rdp_stats,
169 rdp_cookies);
170 if (nread < 0) {
171 DEBUG(1, ("Error calling readdirplus(): %s\n",
172 strerror(errno)));
173 return -1;
176 DEBUG(9, ("readdirplus() returned %u entries from DIR %p\n",
177 dsp->stat_count, dsp->dirp));
179 dsp->direntries_cursor = rdp_direntries;
180 dsp->stat_cursor = 0;
182 return nread;
186 * Create a dir_state to track an open directory that we're enumerating.
188 * This utility function is globally accessible for use by other parts of the
189 * onefs.so module to initialize a dir_state when a directory is opened through
190 * a path other than the VFS layer.
192 * @return 0 on success and errno on failure
194 * @note: Callers of this function MUST cleanup the dir_state through a proper
195 * call to VFS_CLOSEDIR().
198 onefs_rdp_add_dir_state(connection_struct *conn, SMB_STRUCT_DIR *dirp)
200 int ret = 0;
201 struct rdp_dir_state *dsp = NULL;
203 /* No-op if readdirplus is disabled */
204 if (!lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE,
205 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
207 return 0;
210 /* Create a struct dir_state */
211 dsp = SMB_MALLOC_P(struct rdp_dir_state);
212 if (!dsp) {
213 DEBUG(0, ("Error allocating struct rdp_dir_state.\n"));
214 return ENOMEM;
217 /* Initialize the dir_state structure and add it to the list */
218 ret = rdp_init(dsp);
219 if (ret) {
220 DEBUG(0, ("Error initializing readdirplus() buffers: %s\n",
221 strerror(ret)));
222 return ret;
225 /* Set the SMB_STRUCT_DIR in the dsp */
226 dsp->dirp = dirp;
228 DLIST_ADD(dirstatelist, dsp);
230 return 0;
234 * Open a directory for enumeration.
236 * Create a state struct to track the state of this directory for the life
237 * of this open.
239 * @param[in] handle vfs handle given in most VFS calls
240 * @param[in] fname filename of the directory to open
241 * @param[in] mask unused
242 * @param[in] attr unused
244 * @return DIR pointer, NULL if directory does not exist, NULL on error
246 SMB_STRUCT_DIR *
247 onefs_opendir(vfs_handle_struct *handle, const char *fname, const char *mask,
248 uint32 attr)
250 int ret = 0;
251 SMB_STRUCT_DIR *ret_dirp;
253 /* Fallback to default system routines if readdirplus is disabled */
254 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
255 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
257 return SMB_VFS_NEXT_OPENDIR(handle, fname, mask, attr);
260 /* Open the directory */
261 ret_dirp = SMB_VFS_NEXT_OPENDIR(handle, fname, mask, attr);
262 if (!ret_dirp) {
263 DEBUG(3, ("Unable to open directory: %s\n", fname));
264 return NULL;
267 /* Create the dir_state struct and add it to the list */
268 ret = onefs_rdp_add_dir_state(handle->conn, ret_dirp);
269 if (ret) {
270 DEBUG(0, ("Error adding dir_state to the list\n"));
271 return NULL;
274 DEBUG(9, ("Opened handle on directory: \"%s\", DIR %p\n",
275 fname, ret_dirp));
277 return ret_dirp;
281 * Retrieve one direntry and optional stat buffer from our readdir cache.
283 * Increment the internal resume cookie, and refresh the cache from the
284 * kernel if necessary.
286 * @param[in] handle vfs handle given in most VFS calls
287 * @param[in] dirp system DIR handle to retrieve direntries from
288 * @param[in/out] sbuf optional stat buffer to fill, this can be NULL
290 * @return dirent structure, NULL if at the end of the directory, NULL on error
292 SMB_STRUCT_DIRENT *
293 onefs_readdir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp,
294 SMB_STRUCT_STAT *sbuf)
296 struct rdp_dir_state *dsp = NULL;
297 SMB_STRUCT_DIRENT *ret_direntp;
298 bool same_as_last;
299 int ret = -1;
301 /* Set stat invalid in-case we error out */
302 if (sbuf)
303 SET_STAT_INVALID(*sbuf);
305 /* Fallback to default system routines if readdirplus is disabled */
306 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
307 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
309 return sys_readdir(dirp);
312 /* Retrieve state based off DIR handle */
313 ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last);
314 if (ret) {
315 DEBUG(1, ("Could not retrieve dir_state struct for "
316 "SMB_STRUCT_DIR pointer.\n"));
317 ret_direntp = NULL;
318 goto end;
321 /* DIR is the same, current buffer and cursors are valid.
322 * Grab the next direntry from our cache. */
323 if (same_as_last) {
324 if ((dsp->direntries_cursor >=
325 rdp_direntries + RDP_DIRENTRIES_SIZE) ||
326 (dsp->stat_cursor == dsp->stat_count))
328 /* Cache is empty, refill from kernel */
329 ret = rdp_fill_cache(dsp);
330 if (ret <= 0) {
331 ret_direntp = NULL;
332 goto end;
335 } else {
336 /* DIR is different from last call, reset all buffers and
337 * cursors, and refill the global cache from the new DIR */
338 ret = rdp_fill_cache(dsp);
339 if (ret <= 0) {
340 ret_direntp = NULL;
341 goto end;
343 DEBUG(8, ("Switched global rdp cache to new DIR entry.\n"));
346 /* Return next entry from cache */
347 ret_direntp = ((SMB_STRUCT_DIRENT *)dsp->direntries_cursor);
348 dsp->direntries_cursor +=
349 ((SMB_STRUCT_DIRENT *)dsp->direntries_cursor)->d_reclen;
350 if (sbuf) {
351 *sbuf = rdp_stats[dsp->stat_cursor];
352 /* readdirplus() sets st_ino field to 0, if it was
353 * unable to retrieve stat information for that
354 * particular directory entry. */
355 if (sbuf->st_ino == 0)
356 SET_STAT_INVALID(*sbuf);
359 DEBUG(9, ("Read from DIR %p, direntry: \"%s\", location: %ld, "
360 "resume cookie: 0x%llx, cache cursor: %zu, cache count: %zu\n",
361 dsp->dirp, ret_direntp->d_name, dsp->location,
362 dsp->resume_cookie, dsp->stat_cursor, dsp->stat_count));
364 dsp->resume_cookie = rdp_cookies[dsp->stat_cursor];
365 dsp->stat_cursor++;
366 dsp->location++;
368 /* FALLTHROUGH */
369 end:
370 /* Set rdp_last_dirp at the end of every VFS call where the cache was
371 * reloaded */
372 rdp_last_dirp = dirp;
373 return ret_direntp;
377 * Set the location of the next direntry to be read via onefs_readdir().
379 * This function should only pass in locations retrieved from onefs_telldir().
381 * Ideally the seek point will still be in the readdirplus cache, and we'll
382 * just update our cursors. If the seek location is outside of the current
383 * cache we must do an expensive re-enumeration of the entire directory up
384 * to the offset.
386 * @param[in] handle vfs handle given in most VFS calls
387 * @param[in] dirp system DIR handle to set offset on
388 * @param[in] offset from the start of the directory where the next read
389 * will take place
391 * @return no return value
393 void
394 onefs_seekdir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp, long offset)
396 struct rdp_dir_state *dsp = NULL;
397 bool same_as_last;
398 bool outside_cache = false;
399 int ret = -1, i;
401 /* Fallback to default system routines if readdirplus is disabled */
402 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
403 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
405 return sys_seekdir(dirp, offset);
408 /* Validate inputs */
409 if (offset < 0) {
410 DEBUG(1, ("Invalid offset %ld passed.\n", offset));
411 return;
414 /* Retrieve state based off DIR handle */
415 ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last);
416 if (ret) {
417 DEBUG(1, ("Could not retrieve dir_state struct for "
418 "SMB_STRUCT_DIR pointer.\n"));
419 /* XXX: we can't return an error, should we ABORT rather than
420 * return without actually seeking? */
421 return;
424 /* Short cut if no work needs to be done */
425 if (offset == dsp->location)
426 return;
428 /* If DIR is different from last call, reset all buffers and cursors,
429 * and refill the global cache from the new DIR */
430 if (!same_as_last) {
431 ret = rdp_fill_cache(dsp);
432 if (ret <= 0)
433 goto out;
434 DEBUG(8, ("Switched global rdp cache to new DIR entry.\n"));
437 /* Check if location is outside the currently cached entries */
438 if (offset < dsp->location - dsp->stat_cursor) {
439 /* offset is before the current cache */
440 /* reset to the beginning of the directory */
441 ret = rdp_init(dsp);
442 if (ret) {
443 DEBUG(0, ("Error initializing readdirplus() buffers: "
444 "%s\n", strerror(ret)));
445 goto out;
447 outside_cache = true;
448 } else if (offset >
449 dsp->location + (dsp->stat_count - 1 - dsp->stat_cursor))
451 /* offset is after the current cache
452 * advance the cookie to the end of the cache */
453 dsp->resume_cookie = rdp_cookies[dsp->stat_count - 1];
454 outside_cache = true;
457 if (outside_cache) {
458 /* start reading from the directory, until we have the
459 * specified offset in our cache */
460 do {
461 dsp->location += dsp->stat_count - dsp->stat_cursor;
462 ret = rdp_fill_cache(dsp);
463 if (ret <= 0) {
464 DEBUG(1, ("Error seeking to offset outside the "
465 "cached directory entries. Offset "
466 "%ld \n", dsp->location));
467 goto out;
469 dsp->resume_cookie = rdp_cookies[dsp->stat_count - 1];
470 } while (offset >= dsp->location + dsp->stat_count);
473 /* Location should be within the currently cached entries */
474 if (offset < dsp->location &&
475 offset >= dsp->location - dsp->stat_cursor)
477 /* offset is within the current cache, before the cursor.
478 * update cursors to the new location */
479 int new_cursor = dsp->stat_cursor - (dsp->location - offset);
481 dsp->direntries_cursor = rdp_direntries;
482 for (i=0; i < new_cursor; i++) {
483 dsp->direntries_cursor +=
484 ((SMB_STRUCT_DIRENT *)
485 dsp->direntries_cursor)->d_reclen;
487 dsp->stat_cursor = new_cursor;
488 dsp->resume_cookie = rdp_cookies[dsp->stat_cursor];
489 dsp->location = offset;
490 } else if (offset >= dsp->location &&
491 offset <= dsp->location + (dsp->stat_count - 1 - dsp->stat_cursor))
493 /* offset is within the current cache, at or after the cursor.
494 * update cursors to the new location */
495 int add_to_cursor = offset - dsp->location - 1;
497 for (i=0; i < add_to_cursor; i++) {
498 dsp->direntries_cursor +=
499 ((SMB_STRUCT_DIRENT *)
500 dsp->direntries_cursor)->d_reclen;
502 dsp->stat_cursor += add_to_cursor;
503 dsp->resume_cookie = rdp_cookies[dsp->stat_cursor];
504 dsp->location = offset;
507 DEBUG(9, ("Seek DIR %p, location: %ld, cache cursor: %zu\n",
508 dsp->dirp, dsp->location, dsp->stat_cursor));
510 /* FALLTHROUGH */
511 out:
512 /* Set rdp_last_dirp at the end of every VFS call where the cache was
513 * reloaded */
514 rdp_last_dirp = dirp;
515 return;
519 * Returns the location of the next direntry to be read via onefs_readdir().
521 * This value can be passed into onefs_seekdir().
523 * @param[in] handle vfs handle given in most VFS calls
524 * @param[in] dirp system DIR handle to set offset on
526 * @return offset from the start of the directory where the next read
527 * will take place
529 long
530 onefs_telldir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp)
532 struct rdp_dir_state *dsp = NULL;
533 bool same_as_last;
534 int ret = -1;
536 /* Fallback to default system routines if readdirplus is disabled */
537 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
538 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
540 return sys_telldir(dirp);
543 /* Retrieve state based off DIR handle */
544 ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last);
545 if (ret) {
546 DEBUG(1, ("Could not retrieve dir_state struct for "
547 "SMB_STRUCT_DIR pointer.\n"));
548 return -1;
551 DEBUG(9, ("Tell DIR %p, location: %ld, cache cursor: %zu\n",
552 dsp->dirp, dsp->location, dsp->stat_cursor));
554 return dsp->location;
558 * Set the next direntry to be read via onefs_readdir() to the beginning of the
559 * directory.
561 * @param[in] handle vfs handle given in most VFS calls
562 * @param[in] dirp system DIR handle to set offset on
564 * @return no return value
566 void
567 onefs_rewinddir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp)
569 struct rdp_dir_state *dsp = NULL;
570 bool same_as_last;
571 int ret = -1;
573 /* Fallback to default system routines if readdirplus is disabled */
574 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
575 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
577 return sys_rewinddir(dirp);
580 /* Retrieve state based off DIR handle */
581 ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last);
582 if (ret) {
583 DEBUG(1, ("Could not retrieve dir_state struct for "
584 "SMB_STRUCT_DIR pointer.\n"));
585 return;
588 /* Reset location and resume key to beginning */
589 ret = rdp_init(dsp);
590 if (ret) {
591 DEBUG(0, ("Error re-initializing rdp cursors: %s\n",
592 strerror(ret)));
593 return;
596 DEBUG(9, ("Rewind DIR: %p, to location: %ld\n", dsp->dirp,
597 dsp->location));
599 return;
603 * Close DIR pointer and remove all state for that directory open.
605 * @param[in] handle vfs handle given in most VFS calls
606 * @param[in] dirp system DIR handle to set offset on
608 * @return -1 on failure, setting errno
611 onefs_closedir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp)
613 struct rdp_dir_state *dsp = NULL;
614 bool same_as_last;
615 int ret_val = -1;
616 int ret = -1;
618 /* Fallback to default system routines if readdirplus is disabled */
619 if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE,
620 PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT))
622 return SMB_VFS_NEXT_CLOSEDIR(handle, dirp);
625 /* Retrieve state based off DIR handle */
626 ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last);
627 if (ret) {
628 DEBUG(1, ("Could not retrieve dir_state struct for "
629 "SMB_STRUCT_DIR pointer.\n"));
630 errno = ENOENT;
631 return -1;
634 /* Close DIR pointer */
635 ret_val = SMB_VFS_NEXT_CLOSEDIR(handle, dsp->dirp);
637 DEBUG(9, ("Closed handle on DIR %p\n", dsp->dirp));
639 /* Tear down state struct */
640 DLIST_REMOVE(dirstatelist, dsp);
641 SAFE_FREE(dsp);
643 /* Set lastp to NULL, as cache is no longer valid */
644 rdp_last_dirp = NULL;
646 return ret_val;
650 * Initialize cache data at the beginning of every SMB search operation
652 * Since filesystem operations, such as delete files or meta data
653 * updates can occur to files in the directory we're searching
654 * between FIND_FIRST and FIND_NEXT calls we must refresh the cache
655 * from the kernel on every new search SMB.
657 * @param[in] handle vfs handle given in most VFS calls
658 * @param[in] dirp system DIR handle for the current search
660 * @return nothing
662 void
663 onefs_init_search_op(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp)
665 /* Setting the rdp_last_dirp to NULL will cause the next readdir operation
666 * to refill the cache. */
667 rdp_last_dirp = NULL;
669 return;