2 * Unix SMB/CIFS implementation.
4 * Support for OneFS bulk directory enumeration API
6 * Copyright (C) Steven Danneman, 2009
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 3 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, see <http://www.gnu.org/licenses/>.
23 #include "smbd/smbd.h"
25 #include "onefs_config.h"
27 #include <ifs/ifs_syscalls.h>
28 #include <isi_util/isi_dir.h>
30 /* The OneFS filesystem provides a readdirplus() syscall, equivalent to the
31 * NFSv3 PDU, which retrieves bulk directory listings with stat information
32 * in a single syscall.
34 * This file hides this bulk interface underneath Samba's very POSIX like
35 * opendir/readdir/telldir VFS interface. This is done to provide a
36 * significant performance improvement when listing the contents of large
37 * directories, which also require file meta information. ie a typical
38 * Windows Explorer request.
41 #define RDP_RESUME_KEY_START 0x1
43 #define RDP_BATCH_SIZE 128
44 #define RDP_DIRENTRIES_SIZE ((size_t)(RDP_BATCH_SIZE * sizeof(struct dirent)))
46 static char *rdp_direntries
= NULL
;
47 static struct stat
*rdp_stats
= NULL
;
48 static uint64_t *rdp_cookies
= NULL
;
50 struct rdp_dir_state
{
51 struct rdp_dir_state
*next
, *prev
;
53 char *direntries_cursor
; /* cursor to last returned direntry in cache */
54 size_t stat_count
; /* number of entries stored in the cache */
55 size_t stat_cursor
; /* cursor to last returned stat in the cache */
56 uint64_t resume_cookie
; /* cookie from the last entry returned from the
60 static struct rdp_dir_state
*dirstatelist
= NULL
;
62 SMB_STRUCT_DIR
*rdp_last_dirp
= NULL
;
65 * Given a DIR pointer, return our internal state.
67 * This function also tells us whether the given DIR is the same as we saw
68 * during the last call. Because we use a single globally allocated buffer
69 * for readdirplus entries we must check every call into this API to see if
70 * it's for the same directory listing, or a new one. If it's the same we can
71 * maintain our current cached entries, otherwise we must go to the kernel.
73 * @return 0 on success, 1 on failure
76 rdp_retrieve_dir_state(SMB_STRUCT_DIR
*dirp
, struct rdp_dir_state
**dir_state
,
79 struct rdp_dir_state
*dsp
;
81 /* Is this directory the same as the last call */
82 *same_as_last
= (dirp
== rdp_last_dirp
);
84 for(dsp
= dirstatelist
; dsp
; dsp
= dsp
->next
)
85 if (dsp
->dirp
== dirp
) {
90 /* Couldn't find existing dir_state for the given directory
96 * Initialize the global readdirplus buffers.
98 * These same buffers are used for all calls into readdirplus.
100 * @return 0 on success, errno value on failure
103 rdp_init(struct rdp_dir_state
*dsp
)
105 /* Unfortunately, there is no good way to free these buffers. If we
106 * allocated and freed for every DIR handle performance would be
107 * adversely affected. For now these buffers will be leaked and only
108 * freed when the smbd process dies. */
109 if (!rdp_direntries
) {
110 rdp_direntries
= SMB_MALLOC(RDP_DIRENTRIES_SIZE
);
117 SMB_MALLOC(RDP_BATCH_SIZE
* sizeof(struct stat
));
123 rdp_cookies
= SMB_MALLOC(RDP_BATCH_SIZE
* sizeof(uint64_t));
128 dsp
->direntries_cursor
= rdp_direntries
+ RDP_DIRENTRIES_SIZE
;
129 dsp
->stat_count
= RDP_BATCH_SIZE
;
130 dsp
->stat_cursor
= RDP_BATCH_SIZE
;
131 dsp
->resume_cookie
= RDP_RESUME_KEY_START
;
137 * Call into readdirplus() to refill our global dirent cache.
139 * This function also resets all cursors back to the beginning of the cache.
140 * All stat buffers are retrieved by following symlinks.
142 * @return number of entries retrieved, -1 on error
145 rdp_fill_cache(struct rdp_dir_state
*dsp
)
149 dirfd
= dirfd(dsp
->dirp
);
151 DEBUG(1, ("Could not retrieve fd for DIR\n"));
155 /* Resize the stat_count to grab as many entries as possible */
156 dsp
->stat_count
= RDP_BATCH_SIZE
;
158 DEBUG(9, ("Calling readdirplus() with DIR %p, dirfd: %d, "
159 "resume_cookie %#llx, size_to_read: %zu, "
160 "direntries_size: %zu, stat_count: %u\n",
161 dsp
->dirp
, dirfd
, dsp
->resume_cookie
, RDP_BATCH_SIZE
,
162 RDP_DIRENTRIES_SIZE
, dsp
->stat_count
));
164 nread
= readdirplus(dirfd
,
174 DEBUG(1, ("Error calling readdirplus(): %s\n",
179 DEBUG(9, ("readdirplus() returned %u entries from DIR %p\n",
180 dsp
->stat_count
, dsp
->dirp
));
182 dsp
->direntries_cursor
= rdp_direntries
;
183 dsp
->stat_cursor
= 0;
189 * Create a dir_state to track an open directory that we're enumerating.
191 * This utility function is globally accessible for use by other parts of the
192 * onefs.so module to initialize a dir_state when a directory is opened through
193 * a path other than the VFS layer.
195 * @return 0 on success and errno on failure
197 * @note: Callers of this function MUST cleanup the dir_state through a proper
198 * call to VFS_CLOSEDIR().
201 onefs_rdp_add_dir_state(connection_struct
*conn
, SMB_STRUCT_DIR
*dirp
)
204 struct rdp_dir_state
*dsp
= NULL
;
206 /* No-op if readdirplus is disabled */
207 if (!lp_parm_bool(SNUM(conn
), PARM_ONEFS_TYPE
,
208 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
213 /* Create a struct dir_state */
214 dsp
= SMB_MALLOC_P(struct rdp_dir_state
);
216 DEBUG(0, ("Error allocating struct rdp_dir_state.\n"));
220 /* Initialize the dir_state structure and add it to the list */
223 DEBUG(0, ("Error initializing readdirplus() buffers: %s\n",
228 /* Set the SMB_STRUCT_DIR in the dsp */
231 DLIST_ADD(dirstatelist
, dsp
);
237 * Open a directory for enumeration.
239 * Create a state struct to track the state of this directory for the life
242 * @param[in] handle vfs handle given in most VFS calls
243 * @param[in] fname filename of the directory to open
244 * @param[in] mask unused
245 * @param[in] attr unused
247 * @return DIR pointer, NULL if directory does not exist, NULL on error
250 onefs_opendir(vfs_handle_struct
*handle
, const char *fname
, const char *mask
,
254 SMB_STRUCT_DIR
*ret_dirp
;
256 /* Fallback to default system routines if readdirplus is disabled */
257 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
258 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
260 return SMB_VFS_NEXT_OPENDIR(handle
, fname
, mask
, attr
);
263 /* Open the directory */
264 ret_dirp
= SMB_VFS_NEXT_OPENDIR(handle
, fname
, mask
, attr
);
266 DEBUG(3, ("Unable to open directory: %s\n", fname
));
270 /* Create the dir_state struct and add it to the list */
271 ret
= onefs_rdp_add_dir_state(handle
->conn
, ret_dirp
);
273 DEBUG(0, ("Error adding dir_state to the list\n"));
277 DEBUG(9, ("Opened handle on directory: \"%s\", DIR %p\n",
284 * Retrieve one direntry and optional stat buffer from our readdir cache.
286 * Increment the internal resume cookie, and refresh the cache from the
287 * kernel if necessary.
289 * The cache cursor tracks the last entry which was successfully returned
290 * to a caller of onefs_readdir(). When a new entry is requested, this
291 * function first increments the cursor, then returns that entry.
293 * @param[in] handle vfs handle given in most VFS calls
294 * @param[in] dirp system DIR handle to retrieve direntries from
295 * @param[in/out] sbuf optional stat buffer to fill, this can be NULL
297 * @return dirent structure, NULL if at the end of the directory, NULL on error
300 onefs_readdir(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
,
301 SMB_STRUCT_STAT
*sbuf
)
303 struct rdp_dir_state
*dsp
= NULL
;
304 struct dirent
*ret_direntp
;
305 bool same_as_last
, filled_cache
= false;
308 /* Set stat invalid in-case we error out */
310 SET_STAT_INVALID(*sbuf
);
312 /* Fallback to default system routines if readdirplus is disabled */
313 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
314 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
316 return readdir(dirp
);
319 /* Retrieve state based off DIR handle */
320 ret
= rdp_retrieve_dir_state(dirp
, &dsp
, &same_as_last
);
322 DEBUG(1, ("Could not retrieve dir_state struct for "
323 "SMB_STRUCT_DIR pointer.\n"));
328 /* DIR is the same, current buffer and cursors are valid.
329 * Check if there are any entries left in our current cache. */
331 if (dsp
->stat_cursor
== dsp
->stat_count
- 1) {
332 /* Cache is empty, refill from kernel */
333 ret
= rdp_fill_cache(dsp
);
341 /* DIR is different from last call, reset all buffers and
342 * cursors, and refill the global cache from the new DIR */
343 ret
= rdp_fill_cache(dsp
);
349 DEBUG(8, ("Switched global rdp cache to new DIR entry.\n"));
352 /* If we just filled the cache we treat that action as the cursor
353 * increment as the resume cookie used belonged to the previous
354 * directory entry. If the cache has not changed we first increment
355 * our cursor, then return the next entry */
357 dsp
->direntries_cursor
+=
358 ((struct dirent
*)dsp
->direntries_cursor
)->d_reclen
;
362 /* The resume_cookie stored here purposely differs based on whether we
363 * just filled the cache. The resume cookie stored must always provide
364 * the next direntry, in case the cache is reloaded on every
366 dsp
->resume_cookie
= rdp_cookies
[dsp
->stat_cursor
];
368 /* Return an entry from cache */
369 ret_direntp
= ((struct dirent
*)dsp
->direntries_cursor
);
371 struct stat onefs_sbuf
;
373 onefs_sbuf
= rdp_stats
[dsp
->stat_cursor
];
374 init_stat_ex_from_onefs_stat(sbuf
, &onefs_sbuf
);
376 /* readdirplus() sets st_ino field to 0, if it was
377 * unable to retrieve stat information for that
378 * particular directory entry. */
379 if (sbuf
->st_ex_ino
== 0)
380 SET_STAT_INVALID(*sbuf
);
383 DEBUG(9, ("Read from DIR %p, direntry: \"%s\", resume cookie: %#llx, "
384 "cache cursor: %zu, cache count: %zu\n",
385 dsp
->dirp
, ret_direntp
->d_name
, dsp
->resume_cookie
,
386 dsp
->stat_cursor
, dsp
->stat_count
));
390 /* Set rdp_last_dirp at the end of every VFS call where the cache was
392 rdp_last_dirp
= dirp
;
397 * Set the location of the next direntry to be read via onefs_readdir().
399 * This function should only pass in locations retrieved from onefs_telldir().
401 * @param[in] handle vfs handle given in most VFS calls
402 * @param[in] dirp system DIR handle to set offset on
403 * @param[in] offset into the directory to resume reading from
405 * @return no return value
408 onefs_seekdir(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
, long offset
)
410 struct rdp_dir_state
*dsp
= NULL
;
412 uint64_t resume_cookie
= 0;
415 /* Fallback to default system routines if readdirplus is disabled */
416 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
417 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
419 return seekdir(dirp
, offset
);
422 /* Validate inputs */
424 DEBUG(1, ("Invalid offset %ld passed.\n", offset
));
428 /* Retrieve state based off DIR handle */
429 ret
= rdp_retrieve_dir_state(dirp
, &dsp
, &same_as_last
);
431 DEBUG(1, ("Could not retrieve dir_state struct for "
432 "SMB_STRUCT_DIR pointer.\n"));
433 /* XXX: we can't return an error, should we ABORT rather than
434 * return without actually seeking? */
438 /* Convert offset to resume_cookie */
439 resume_cookie
= rdp_offset31_to_cookie63(offset
);
441 DEBUG(9, ("Seek DIR %p, offset: %ld, resume_cookie: %#llx\n",
442 dsp
->dirp
, offset
, resume_cookie
));
444 /* TODO: We could check if the resume_cookie is already in the cache
445 * through a linear search. This would allow us to avoid the cost of
446 * flushing the cache. Frequently, the seekdir offset will only be
447 * one entry before the current cache cursor. However, usually
448 * VFS_SEEKDIR() is only called at the end of a TRAND2_FIND read and
449 * we'll flush the cache at the beginning of the next PDU anyway. Some
450 * analysis should be done to see if this enhancement would provide
451 * better performance. */
453 /* Set the resume cookie and indicate that the cache should be reloaded
455 dsp
->resume_cookie
= resume_cookie
;
456 rdp_last_dirp
= NULL
;
462 * Returns the location of the next direntry to be read via onefs_readdir().
464 * This value can be passed into onefs_seekdir().
466 * @param[in] handle vfs handle given in most VFS calls
467 * @param[in] dirp system DIR handle to set offset on
469 * @return offset into the directory to resume reading from
472 onefs_telldir(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
)
474 struct rdp_dir_state
*dsp
= NULL
;
479 /* Fallback to default system routines if readdirplus is disabled */
480 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
481 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
483 return telldir(dirp
);
486 /* Retrieve state based off DIR handle */
487 ret
= rdp_retrieve_dir_state(dirp
, &dsp
, &same_as_last
);
489 DEBUG(1, ("Could not retrieve dir_state struct for "
490 "SMB_STRUCT_DIR pointer.\n"));
494 /* Convert resume_cookie to offset */
495 offset
= rdp_cookie63_to_offset31(dsp
->resume_cookie
);
497 DEBUG(1, ("Unable to convert resume_cookie: %#llx to a "
498 "suitable 32-bit offset value. Error: %s\n",
499 dsp
->resume_cookie
, strerror(errno
)));
503 DEBUG(9, ("Seek DIR %p, offset: %ld, resume_cookie: %#llx\n",
504 dsp
->dirp
, offset
, dsp
->resume_cookie
));
510 * Set the next direntry to be read via onefs_readdir() to the beginning of the
513 * @param[in] handle vfs handle given in most VFS calls
514 * @param[in] dirp system DIR handle to set offset on
516 * @return no return value
519 onefs_rewinddir(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
)
521 struct rdp_dir_state
*dsp
= NULL
;
525 /* Fallback to default system routines if readdirplus is disabled */
526 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
527 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
529 return rewinddir(dirp
);
532 /* Retrieve state based off DIR handle */
533 ret
= rdp_retrieve_dir_state(dirp
, &dsp
, &same_as_last
);
535 DEBUG(1, ("Could not retrieve dir_state struct for "
536 "SMB_STRUCT_DIR pointer.\n"));
540 /* Reset location and resume key to beginning */
543 DEBUG(0, ("Error re-initializing rdp cursors: %s\n",
548 DEBUG(9, ("Rewind DIR: %p, to resume_cookie: %#llx\n", dsp
->dirp
,
549 dsp
->resume_cookie
));
555 * Close DIR pointer and remove all state for that directory open.
557 * @param[in] handle vfs handle given in most VFS calls
558 * @param[in] dirp system DIR handle to set offset on
560 * @return -1 on failure, setting errno
563 onefs_closedir(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
)
565 struct rdp_dir_state
*dsp
= NULL
;
570 /* Fallback to default system routines if readdirplus is disabled */
571 if (!lp_parm_bool(SNUM(handle
->conn
), PARM_ONEFS_TYPE
,
572 PARM_USE_READDIRPLUS
, PARM_USE_READDIRPLUS_DEFAULT
))
574 return SMB_VFS_NEXT_CLOSEDIR(handle
, dirp
);
577 /* Retrieve state based off DIR handle */
578 ret
= rdp_retrieve_dir_state(dirp
, &dsp
, &same_as_last
);
580 DEBUG(1, ("Could not retrieve dir_state struct for "
581 "SMB_STRUCT_DIR pointer.\n"));
586 /* Close DIR pointer */
587 ret_val
= SMB_VFS_NEXT_CLOSEDIR(handle
, dsp
->dirp
);
589 DEBUG(9, ("Closed handle on DIR %p\n", dsp
->dirp
));
591 /* Tear down state struct */
592 DLIST_REMOVE(dirstatelist
, dsp
);
595 /* Set lastp to NULL, as cache is no longer valid */
596 rdp_last_dirp
= NULL
;
602 * Initialize cache data at the beginning of every SMB search operation
604 * Since filesystem operations, such as delete files or meta data
605 * updates can occur to files in the directory we're searching
606 * between FIND_FIRST and FIND_NEXT calls we must refresh the cache
607 * from the kernel on every new search SMB.
609 * @param[in] handle vfs handle given in most VFS calls
610 * @param[in] dirp system DIR handle for the current search
615 onefs_init_search_op(vfs_handle_struct
*handle
, SMB_STRUCT_DIR
*dirp
)
617 /* Setting the rdp_last_dirp to NULL will cause the next readdir
618 * operation to refill the cache. */
619 rdp_last_dirp
= NULL
;