1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright 2017 Omnibond Systems, L.L.C.
7 #include "orangefs-kernel.h"
8 #include "orangefs-bufmap.h"
10 struct orangefs_dir_part
{
11 struct orangefs_dir_part
*next
;
17 struct orangefs_dir_part
*part
;
22 #define PART_SHIFT (24)
23 #define PART_SIZE (1<<24)
24 #define PART_MASK (~(PART_SIZE - 1))
27 * There can be up to 512 directory entries. Each entry is encoded as
29 * 4 bytes: string size (n)
31 * 1 byte: trailing zero
36 * The trailer_buf starts with a struct orangefs_readdir_response_s
37 * which must be skipped to get to the directory data.
39 * The data which is received from the userspace daemon is termed a
40 * part and is stored in a linked list in case more than one part is
41 * needed for a large directory.
43 * The position pointer (ctx->pos) encodes the part and offset on which
44 * to begin reading at. Bits above PART_SHIFT encode the part and bits
45 * below PART_SHIFT encode the offset. Parts are stored in a linked
46 * list which grows as data is received from the server. The overhead
47 * associated with managing the list is presumed to be small compared to
48 * the overhead of communicating with the server.
50 * As data is received from the server, it is placed at the end of the
51 * part list. Data is parsed from the current position as it is needed.
52 * When data is determined to be corrupt, it is either because the
53 * userspace component has sent back corrupt data or because the file
54 * pointer has been moved to an invalid location. Since the two cannot
55 * be differentiated, return EIO.
57 * Part zero is synthesized to contains `.' and `..'. Part one is the
58 * first part of the part list.
61 static int do_readdir(struct orangefs_inode_s
*oi
,
62 struct orangefs_dir
*od
, struct dentry
*dentry
,
63 struct orangefs_kernel_op_s
*op
)
65 struct orangefs_readdir_response_s
*resp
;
69 * Despite the badly named field, readdir does not use shared
70 * memory. However, there are a limited number of readdir
71 * slots, which must be allocated here. This flag simply tells
72 * the op scheduler to return the op here for retry.
74 op
->uses_shared_memory
= 1;
75 op
->upcall
.req
.readdir
.refn
= oi
->refn
;
76 op
->upcall
.req
.readdir
.token
= od
->token
;
77 op
->upcall
.req
.readdir
.max_dirent_count
=
78 ORANGEFS_MAX_DIRENT_COUNT_READDIR
;
81 bufi
= orangefs_readdir_index_get();
87 op
->upcall
.req
.readdir
.buf_index
= bufi
;
89 r
= service_operation(op
, "orangefs_readdir",
90 get_interruptible_flag(dentry
->d_inode
));
92 orangefs_readdir_index_put(bufi
);
94 if (op_state_purged(op
)) {
96 vfree(op
->downcall
.trailer_buf
);
98 } else if (r
== -EIO
) {
99 vfree(op
->downcall
.trailer_buf
);
106 vfree(op
->downcall
.trailer_buf
);
109 } else if (op
->downcall
.status
) {
110 vfree(op
->downcall
.trailer_buf
);
111 od
->error
= op
->downcall
.status
;
112 return op
->downcall
.status
;
116 * The maximum size is size per entry times the 512 entries plus
117 * the header. This is well under the limit.
119 if (op
->downcall
.trailer_size
> PART_SIZE
) {
120 vfree(op
->downcall
.trailer_buf
);
125 resp
= (struct orangefs_readdir_response_s
*)
126 op
->downcall
.trailer_buf
;
127 od
->token
= resp
->token
;
131 static int parse_readdir(struct orangefs_dir
*od
,
132 struct orangefs_kernel_op_s
*op
)
134 struct orangefs_dir_part
*part
, *new;
147 new = (void *)op
->downcall
.trailer_buf
;
149 new->len
= op
->downcall
.trailer_size
-
150 sizeof(struct orangefs_readdir_response_s
);
156 od
->end
= count
<< PART_SHIFT
;
161 static int orangefs_dir_more(struct orangefs_inode_s
*oi
,
162 struct orangefs_dir
*od
, struct dentry
*dentry
)
164 struct orangefs_kernel_op_s
*op
;
167 op
= op_alloc(ORANGEFS_VFS_OP_READDIR
);
172 r
= do_readdir(oi
, od
, dentry
, op
);
177 r
= parse_readdir(od
, op
);
189 static int fill_from_part(struct orangefs_dir_part
*part
,
190 struct dir_context
*ctx
)
192 const int offset
= sizeof(struct orangefs_readdir_response_s
);
193 struct orangefs_khandle
*khandle
;
197 i
= ctx
->pos
& ~PART_MASK
;
199 /* The file offset from userspace is too large. */
204 * If the seek pointer is positioned just before an entry it
205 * should find the next entry.
210 while (i
< part
->len
) {
211 if (part
->len
< i
+ sizeof *len
)
213 len
= (void *)part
+ offset
+ i
;
215 * len is the size of the string itself. padlen is the
216 * total size of the encoded string.
218 padlen
= (sizeof *len
+ *len
+ 1) +
219 (8 - (sizeof *len
+ *len
+ 1)%8)%8;
220 if (part
->len
< i
+ padlen
+ sizeof *khandle
)
222 s
= (void *)part
+ offset
+ i
+ sizeof *len
;
225 khandle
= (void *)part
+ offset
+ i
+ padlen
;
226 if (!dir_emit(ctx
, s
, *len
,
227 orangefs_khandle_to_ino(khandle
),
230 i
+= padlen
+ sizeof *khandle
;
232 BUG_ON(i
> part
->len
);
233 ctx
->pos
= (ctx
->pos
& PART_MASK
) | i
;
241 static int orangefs_dir_fill(struct orangefs_inode_s
*oi
,
242 struct orangefs_dir
*od
, struct dentry
*dentry
,
243 struct dir_context
*ctx
)
245 struct orangefs_dir_part
*part
;
248 count
= ((ctx
->pos
& PART_MASK
) >> PART_SHIFT
) - 1;
251 while (part
->next
&& count
) {
255 /* This means the userspace file offset is invalid. */
261 while (part
&& part
->len
) {
263 r
= fill_from_part(part
, ctx
);
268 /* Userspace buffer is full. */
272 * The part ran out of data. Move to the next
274 ctx
->pos
= (ctx
->pos
& PART_MASK
) +
282 static loff_t
orangefs_dir_llseek(struct file
*file
, loff_t offset
,
285 struct orangefs_dir
*od
= file
->private_data
;
287 * Delete the stored data so userspace sees new directory
290 if (!whence
&& offset
< od
->end
) {
291 struct orangefs_dir_part
*part
= od
->part
;
293 struct orangefs_dir_part
*next
= part
->next
;
297 od
->token
= ORANGEFS_ITERATE_START
;
299 od
->end
= 1 << PART_SHIFT
;
301 return default_llseek(file
, offset
, whence
);
304 static int orangefs_dir_iterate(struct file
*file
,
305 struct dir_context
*ctx
)
307 struct orangefs_inode_s
*oi
;
308 struct orangefs_dir
*od
;
309 struct dentry
*dentry
;
312 dentry
= file
->f_path
.dentry
;
313 oi
= ORANGEFS_I(dentry
->d_inode
);
314 od
= file
->private_data
;
320 if (!dir_emit_dot(file
, ctx
))
325 if (!dir_emit_dotdot(file
, ctx
))
327 ctx
->pos
= 1 << PART_SHIFT
;
331 * The seek position is in the first synthesized part but is not
334 if ((ctx
->pos
& PART_MASK
) == 0)
340 * Must read more if the user has sought past what has been read
341 * so far. Stop a user who has sought past the end.
343 while (od
->token
!= ORANGEFS_ITERATE_END
&&
344 ctx
->pos
> od
->end
) {
345 r
= orangefs_dir_more(oi
, od
, dentry
);
349 if (od
->token
== ORANGEFS_ITERATE_END
&& ctx
->pos
> od
->end
)
352 /* Then try to fill if there's any left in the buffer. */
353 if (ctx
->pos
< od
->end
) {
354 r
= orangefs_dir_fill(oi
, od
, dentry
, ctx
);
359 /* Finally get some more and try to fill. */
360 if (od
->token
!= ORANGEFS_ITERATE_END
) {
361 r
= orangefs_dir_more(oi
, od
, dentry
);
364 r
= orangefs_dir_fill(oi
, od
, dentry
, ctx
);
370 static int orangefs_dir_open(struct inode
*inode
, struct file
*file
)
372 struct orangefs_dir
*od
;
373 file
->private_data
= kmalloc(sizeof(struct orangefs_dir
),
375 if (!file
->private_data
)
377 od
= file
->private_data
;
378 od
->token
= ORANGEFS_ITERATE_START
;
380 od
->end
= 1 << PART_SHIFT
;
385 static int orangefs_dir_release(struct inode
*inode
, struct file
*file
)
387 struct orangefs_dir
*od
= file
->private_data
;
388 struct orangefs_dir_part
*part
= od
->part
;
390 struct orangefs_dir_part
*next
= part
->next
;
398 const struct file_operations orangefs_dir_operations
= {
399 .llseek
= orangefs_dir_llseek
,
400 .read
= generic_read_dir
,
401 .iterate
= orangefs_dir_iterate
,
402 .open
= orangefs_dir_open
,
403 .release
= orangefs_dir_release