Merge branch 'bpf-Add-support-for-sock_ops'
[linux-2.6/btrfs-unstable.git] / fs / orangefs / orangefs-utils.c
blobaab6f1842963d3d0e5891900f46fc9eb570e9a4f
1 /*
2 * (C) 2001 Clemson University and The University of Chicago
4 * See COPYING in top-level directory.
5 */
6 #include "protocol.h"
7 #include "orangefs-kernel.h"
8 #include "orangefs-dev-proto.h"
9 #include "orangefs-bufmap.h"
11 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
13 __s32 fsid = ORANGEFS_FS_ID_NULL;
15 if (op) {
16 switch (op->upcall.type) {
17 case ORANGEFS_VFS_OP_FILE_IO:
18 fsid = op->upcall.req.io.refn.fs_id;
19 break;
20 case ORANGEFS_VFS_OP_LOOKUP:
21 fsid = op->upcall.req.lookup.parent_refn.fs_id;
22 break;
23 case ORANGEFS_VFS_OP_CREATE:
24 fsid = op->upcall.req.create.parent_refn.fs_id;
25 break;
26 case ORANGEFS_VFS_OP_GETATTR:
27 fsid = op->upcall.req.getattr.refn.fs_id;
28 break;
29 case ORANGEFS_VFS_OP_REMOVE:
30 fsid = op->upcall.req.remove.parent_refn.fs_id;
31 break;
32 case ORANGEFS_VFS_OP_MKDIR:
33 fsid = op->upcall.req.mkdir.parent_refn.fs_id;
34 break;
35 case ORANGEFS_VFS_OP_READDIR:
36 fsid = op->upcall.req.readdir.refn.fs_id;
37 break;
38 case ORANGEFS_VFS_OP_SETATTR:
39 fsid = op->upcall.req.setattr.refn.fs_id;
40 break;
41 case ORANGEFS_VFS_OP_SYMLINK:
42 fsid = op->upcall.req.sym.parent_refn.fs_id;
43 break;
44 case ORANGEFS_VFS_OP_RENAME:
45 fsid = op->upcall.req.rename.old_parent_refn.fs_id;
46 break;
47 case ORANGEFS_VFS_OP_STATFS:
48 fsid = op->upcall.req.statfs.fs_id;
49 break;
50 case ORANGEFS_VFS_OP_TRUNCATE:
51 fsid = op->upcall.req.truncate.refn.fs_id;
52 break;
53 case ORANGEFS_VFS_OP_RA_FLUSH:
54 fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
55 break;
56 case ORANGEFS_VFS_OP_FS_UMOUNT:
57 fsid = op->upcall.req.fs_umount.fs_id;
58 break;
59 case ORANGEFS_VFS_OP_GETXATTR:
60 fsid = op->upcall.req.getxattr.refn.fs_id;
61 break;
62 case ORANGEFS_VFS_OP_SETXATTR:
63 fsid = op->upcall.req.setxattr.refn.fs_id;
64 break;
65 case ORANGEFS_VFS_OP_LISTXATTR:
66 fsid = op->upcall.req.listxattr.refn.fs_id;
67 break;
68 case ORANGEFS_VFS_OP_REMOVEXATTR:
69 fsid = op->upcall.req.removexattr.refn.fs_id;
70 break;
71 case ORANGEFS_VFS_OP_FSYNC:
72 fsid = op->upcall.req.fsync.refn.fs_id;
73 break;
74 default:
75 break;
78 return fsid;
81 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
83 int flags = 0;
84 if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
85 flags |= S_IMMUTABLE;
86 else
87 flags &= ~S_IMMUTABLE;
88 if (attrs->flags & ORANGEFS_APPEND_FL)
89 flags |= S_APPEND;
90 else
91 flags &= ~S_APPEND;
92 if (attrs->flags & ORANGEFS_NOATIME_FL)
93 flags |= S_NOATIME;
94 else
95 flags &= ~S_NOATIME;
96 return flags;
99 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
101 int perm_mode = 0;
103 if (attrs->perms & ORANGEFS_O_EXECUTE)
104 perm_mode |= S_IXOTH;
105 if (attrs->perms & ORANGEFS_O_WRITE)
106 perm_mode |= S_IWOTH;
107 if (attrs->perms & ORANGEFS_O_READ)
108 perm_mode |= S_IROTH;
110 if (attrs->perms & ORANGEFS_G_EXECUTE)
111 perm_mode |= S_IXGRP;
112 if (attrs->perms & ORANGEFS_G_WRITE)
113 perm_mode |= S_IWGRP;
114 if (attrs->perms & ORANGEFS_G_READ)
115 perm_mode |= S_IRGRP;
117 if (attrs->perms & ORANGEFS_U_EXECUTE)
118 perm_mode |= S_IXUSR;
119 if (attrs->perms & ORANGEFS_U_WRITE)
120 perm_mode |= S_IWUSR;
121 if (attrs->perms & ORANGEFS_U_READ)
122 perm_mode |= S_IRUSR;
124 if (attrs->perms & ORANGEFS_G_SGID)
125 perm_mode |= S_ISGID;
126 if (attrs->perms & ORANGEFS_U_SUID)
127 perm_mode |= S_ISUID;
129 return perm_mode;
133 * NOTE: in kernel land, we never use the sys_attr->link_target for
134 * anything, so don't bother copying it into the sys_attr object here.
136 static inline int copy_attributes_from_inode(struct inode *inode,
137 struct ORANGEFS_sys_attr_s *attrs,
138 struct iattr *iattr)
140 umode_t tmp_mode;
142 if (!iattr || !inode || !attrs) {
143 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
144 "in copy_attributes_from_inode!\n",
145 iattr,
146 inode,
147 attrs);
148 return -EINVAL;
151 * We need to be careful to only copy the attributes out of the
152 * iattr object that we know are valid.
154 attrs->mask = 0;
155 if (iattr->ia_valid & ATTR_UID) {
156 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
157 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
158 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
160 if (iattr->ia_valid & ATTR_GID) {
161 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
162 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
163 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166 if (iattr->ia_valid & ATTR_ATIME) {
167 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
168 if (iattr->ia_valid & ATTR_ATIME_SET) {
169 attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
170 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173 if (iattr->ia_valid & ATTR_MTIME) {
174 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
175 if (iattr->ia_valid & ATTR_MTIME_SET) {
176 attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
177 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180 if (iattr->ia_valid & ATTR_CTIME)
181 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184 * ORANGEFS cannot set size with a setattr operation. Probably not likely
185 * to be requested through the VFS, but just in case, don't worry about
186 * ATTR_SIZE
189 if (iattr->ia_valid & ATTR_MODE) {
190 tmp_mode = iattr->ia_mode;
191 if (tmp_mode & (S_ISVTX)) {
192 if (is_root_handle(inode)) {
194 * allow sticky bit to be set on root (since
195 * it shows up that way by default anyhow),
196 * but don't show it to the server
198 tmp_mode -= S_ISVTX;
199 } else {
200 gossip_debug(GOSSIP_UTILS_DEBUG,
201 "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
202 return -EINVAL;
206 if (tmp_mode & (S_ISUID)) {
207 gossip_debug(GOSSIP_UTILS_DEBUG,
208 "Attempting to set setuid bit (not supported); returning EINVAL.\n");
209 return -EINVAL;
212 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
213 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
216 return 0;
219 static int orangefs_inode_type(enum orangefs_ds_type objtype)
221 if (objtype == ORANGEFS_TYPE_METAFILE)
222 return S_IFREG;
223 else if (objtype == ORANGEFS_TYPE_DIRECTORY)
224 return S_IFDIR;
225 else if (objtype == ORANGEFS_TYPE_SYMLINK)
226 return S_IFLNK;
227 else
228 return -1;
231 static int orangefs_inode_is_stale(struct inode *inode, int new,
232 struct ORANGEFS_sys_attr_s *attrs, char *link_target)
234 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
235 int type = orangefs_inode_type(attrs->objtype);
236 if (!new) {
238 * If the inode type or symlink target have changed then this
239 * inode is stale.
241 if (type == -1 || !(inode->i_mode & type)) {
242 orangefs_make_bad_inode(inode);
243 return 1;
245 if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
246 link_target, ORANGEFS_NAME_MAX)) {
247 orangefs_make_bad_inode(inode);
248 return 1;
251 return 0;
254 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
255 u32 request_mask)
257 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
258 struct orangefs_kernel_op_s *new_op;
259 loff_t inode_size, rounded_up_size;
260 int ret, type;
262 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
263 get_khandle_from_ino(inode));
265 if (!new && !bypass) {
267 * Must have all the attributes in the mask and be within cache
268 * time.
270 if ((request_mask & orangefs_inode->getattr_mask) ==
271 request_mask &&
272 time_before(jiffies, orangefs_inode->getattr_time))
273 return 0;
276 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
277 if (!new_op)
278 return -ENOMEM;
279 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
281 * Size is the hardest attribute to get. The incremental cost of any
282 * other attribute is essentially zero.
284 if (request_mask & STATX_SIZE || new)
285 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
286 else
287 new_op->upcall.req.getattr.mask =
288 ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
290 ret = service_operation(new_op, __func__,
291 get_interruptible_flag(inode));
292 if (ret != 0)
293 goto out;
295 type = orangefs_inode_type(new_op->
296 downcall.resp.getattr.attributes.objtype);
297 ret = orangefs_inode_is_stale(inode, new,
298 &new_op->downcall.resp.getattr.attributes,
299 new_op->downcall.resp.getattr.link_target);
300 if (ret) {
301 ret = -ESTALE;
302 goto out;
305 switch (type) {
306 case S_IFREG:
307 inode->i_flags = orangefs_inode_flags(&new_op->
308 downcall.resp.getattr.attributes);
309 if (request_mask & STATX_SIZE || new) {
310 inode_size = (loff_t)new_op->
311 downcall.resp.getattr.attributes.size;
312 rounded_up_size =
313 (inode_size + (4096 - (inode_size % 4096)));
314 inode->i_size = inode_size;
315 orangefs_inode->blksize =
316 new_op->downcall.resp.getattr.attributes.blksize;
317 spin_lock(&inode->i_lock);
318 inode->i_bytes = inode_size;
319 inode->i_blocks =
320 (unsigned long)(rounded_up_size / 512);
321 spin_unlock(&inode->i_lock);
323 break;
324 case S_IFDIR:
325 if (request_mask & STATX_SIZE || new) {
326 inode->i_size = PAGE_SIZE;
327 orangefs_inode->blksize = i_blocksize(inode);
328 spin_lock(&inode->i_lock);
329 inode_set_bytes(inode, inode->i_size);
330 spin_unlock(&inode->i_lock);
332 set_nlink(inode, 1);
333 break;
334 case S_IFLNK:
335 if (new) {
336 inode->i_size = (loff_t)strlen(new_op->
337 downcall.resp.getattr.link_target);
338 orangefs_inode->blksize = i_blocksize(inode);
339 ret = strscpy(orangefs_inode->link_target,
340 new_op->downcall.resp.getattr.link_target,
341 ORANGEFS_NAME_MAX);
342 if (ret == -E2BIG) {
343 ret = -EIO;
344 goto out;
346 inode->i_link = orangefs_inode->link_target;
348 break;
351 inode->i_uid = make_kuid(&init_user_ns, new_op->
352 downcall.resp.getattr.attributes.owner);
353 inode->i_gid = make_kgid(&init_user_ns, new_op->
354 downcall.resp.getattr.attributes.group);
355 inode->i_atime.tv_sec = (time64_t)new_op->
356 downcall.resp.getattr.attributes.atime;
357 inode->i_mtime.tv_sec = (time64_t)new_op->
358 downcall.resp.getattr.attributes.mtime;
359 inode->i_ctime.tv_sec = (time64_t)new_op->
360 downcall.resp.getattr.attributes.ctime;
361 inode->i_atime.tv_nsec = 0;
362 inode->i_mtime.tv_nsec = 0;
363 inode->i_ctime.tv_nsec = 0;
365 /* special case: mark the root inode as sticky */
366 inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
367 orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
369 orangefs_inode->getattr_time = jiffies +
370 orangefs_getattr_timeout_msecs*HZ/1000;
371 if (request_mask & STATX_SIZE || new)
372 orangefs_inode->getattr_mask = STATX_BASIC_STATS;
373 else
374 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
375 ret = 0;
376 out:
377 op_release(new_op);
378 return ret;
381 int orangefs_inode_check_changed(struct inode *inode)
383 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
384 struct orangefs_kernel_op_s *new_op;
385 int ret;
387 gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
388 get_khandle_from_ino(inode));
390 new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
391 if (!new_op)
392 return -ENOMEM;
393 new_op->upcall.req.getattr.refn = orangefs_inode->refn;
394 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
395 ORANGEFS_ATTR_SYS_LNK_TARGET;
397 ret = service_operation(new_op, __func__,
398 get_interruptible_flag(inode));
399 if (ret != 0)
400 goto out;
402 ret = orangefs_inode_is_stale(inode, 0,
403 &new_op->downcall.resp.getattr.attributes,
404 new_op->downcall.resp.getattr.link_target);
405 out:
406 op_release(new_op);
407 return ret;
411 * issues a orangefs setattr request to make sure the new attribute values
412 * take effect if successful. returns 0 on success; -errno otherwise
414 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
416 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
417 struct orangefs_kernel_op_s *new_op;
418 int ret;
420 new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
421 if (!new_op)
422 return -ENOMEM;
424 new_op->upcall.req.setattr.refn = orangefs_inode->refn;
425 ret = copy_attributes_from_inode(inode,
426 &new_op->upcall.req.setattr.attributes,
427 iattr);
428 if (ret >= 0) {
429 ret = service_operation(new_op, __func__,
430 get_interruptible_flag(inode));
432 gossip_debug(GOSSIP_UTILS_DEBUG,
433 "orangefs_inode_setattr: returning %d\n",
434 ret);
437 op_release(new_op);
440 * successful setattr should clear the atime, mtime and
441 * ctime flags.
443 if (ret == 0) {
444 ClearAtimeFlag(orangefs_inode);
445 ClearMtimeFlag(orangefs_inode);
446 ClearCtimeFlag(orangefs_inode);
447 ClearModeFlag(orangefs_inode);
448 orangefs_inode->getattr_time = jiffies - 1;
451 return ret;
454 int orangefs_flush_inode(struct inode *inode)
457 * If it is a dirty inode, this function gets called.
458 * Gather all the information that needs to be setattr'ed
459 * Right now, this will only be used for mode, atime, mtime
460 * and/or ctime.
462 struct iattr wbattr;
463 int ret;
464 int mtime_flag;
465 int ctime_flag;
466 int atime_flag;
467 int mode_flag;
468 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
470 memset(&wbattr, 0, sizeof(wbattr));
473 * check inode flags up front, and clear them if they are set. This
474 * will prevent multiple processes from all trying to flush the same
475 * inode if they call close() simultaneously
477 mtime_flag = MtimeFlag(orangefs_inode);
478 ClearMtimeFlag(orangefs_inode);
479 ctime_flag = CtimeFlag(orangefs_inode);
480 ClearCtimeFlag(orangefs_inode);
481 atime_flag = AtimeFlag(orangefs_inode);
482 ClearAtimeFlag(orangefs_inode);
483 mode_flag = ModeFlag(orangefs_inode);
484 ClearModeFlag(orangefs_inode);
486 /* -- Lazy atime,mtime and ctime update --
487 * Note: all times are dictated by server in the new scheme
488 * and not by the clients
490 * Also mode updates are being handled now..
493 if (mtime_flag)
494 wbattr.ia_valid |= ATTR_MTIME;
495 if (ctime_flag)
496 wbattr.ia_valid |= ATTR_CTIME;
497 if (atime_flag)
498 wbattr.ia_valid |= ATTR_ATIME;
500 if (mode_flag) {
501 wbattr.ia_mode = inode->i_mode;
502 wbattr.ia_valid |= ATTR_MODE;
505 gossip_debug(GOSSIP_UTILS_DEBUG,
506 "*********** orangefs_flush_inode: %pU "
507 "(ia_valid %d)\n",
508 get_khandle_from_ino(inode),
509 wbattr.ia_valid);
510 if (wbattr.ia_valid == 0) {
511 gossip_debug(GOSSIP_UTILS_DEBUG,
512 "orangefs_flush_inode skipping setattr()\n");
513 return 0;
516 gossip_debug(GOSSIP_UTILS_DEBUG,
517 "orangefs_flush_inode (%pU) writing mode %o\n",
518 get_khandle_from_ino(inode),
519 inode->i_mode);
521 ret = orangefs_inode_setattr(inode, &wbattr);
523 return ret;
526 void orangefs_make_bad_inode(struct inode *inode)
528 if (is_root_handle(inode)) {
530 * if this occurs, the pvfs2-client-core was killed but we
531 * can't afford to lose the inode operations and such
532 * associated with the root handle in any case.
534 gossip_debug(GOSSIP_UTILS_DEBUG,
535 "*** NOT making bad root inode %pU\n",
536 get_khandle_from_ino(inode));
537 } else {
538 gossip_debug(GOSSIP_UTILS_DEBUG,
539 "*** making bad inode %pU\n",
540 get_khandle_from_ino(inode));
541 make_bad_inode(inode);
546 * The following is a very dirty hack that is now a permanent part of the
547 * ORANGEFS protocol. See protocol.h for more error definitions.
550 /* The order matches include/orangefs-types.h in the OrangeFS source. */
551 static int PINT_errno_mapping[] = {
552 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
553 EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
554 EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
555 ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
556 EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
557 EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
558 ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
559 EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
560 ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
561 EACCES, ECONNRESET, ERANGE
564 int orangefs_normalize_to_errno(__s32 error_code)
566 __u32 i;
568 /* Success */
569 if (error_code == 0) {
570 return 0;
572 * This shouldn't ever happen. If it does it should be fixed on the
573 * server.
575 } else if (error_code > 0) {
576 gossip_err("orangefs: error status receieved.\n");
577 gossip_err("orangefs: assuming error code is inverted.\n");
578 error_code = -error_code;
582 * XXX: This is very bad since error codes from ORANGEFS may not be
583 * suitable for return into userspace.
587 * Convert ORANGEFS error values into errno values suitable for return
588 * from the kernel.
590 if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
591 if (((-error_code) &
592 (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
593 ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
595 * cancellation error codes generally correspond to
596 * a timeout from the client's perspective
598 error_code = -ETIMEDOUT;
599 } else {
600 /* assume a default error code */
601 gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
602 error_code = -EINVAL;
605 /* Convert ORANGEFS encoded errno values into regular errno values. */
606 } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
607 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
608 if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
609 error_code = -PINT_errno_mapping[i];
610 else
611 error_code = -EINVAL;
614 * Only ORANGEFS protocol error codes should ever come here. Otherwise
615 * there is a bug somewhere.
617 } else {
618 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
620 return error_code;
623 #define NUM_MODES 11
624 __s32 ORANGEFS_util_translate_mode(int mode)
626 int ret = 0;
627 int i = 0;
628 static int modes[NUM_MODES] = {
629 S_IXOTH, S_IWOTH, S_IROTH,
630 S_IXGRP, S_IWGRP, S_IRGRP,
631 S_IXUSR, S_IWUSR, S_IRUSR,
632 S_ISGID, S_ISUID
634 static int orangefs_modes[NUM_MODES] = {
635 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
636 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
637 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
638 ORANGEFS_G_SGID, ORANGEFS_U_SUID
641 for (i = 0; i < NUM_MODES; i++)
642 if (mode & modes[i])
643 ret |= orangefs_modes[i];
645 return ret;
647 #undef NUM_MODES