4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1983, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
29 * University Copyright- Copyright (c) 1982, 1986, 1988
30 * The Regents of the University of California
33 * University Acknowledgment- Portions of this document are derived from
34 * software developed by the University of California, Berkeley, and its
38 #ifndef _SYS_FS_UFS_INODE_H
39 #define _SYS_FS_UFS_INODE_H
41 #include <sys/isa_defs.h>
43 #include <sys/fdbuffer.h>
44 #include <sys/fcntl.h>
46 #include <sys/t_lock.h>
47 #include <sys/thread.h>
50 #include <sys/types32.h>
51 #include <sys/fs/ufs_fs.h>
52 #include <sys/fs/ufs_lockfs.h>
53 #include <sys/fs/ufs_trans.h>
54 #include <sys/kstat.h>
55 #include <sys/fs/ufs_acl.h>
56 #include <sys/fs/ufs_panic.h>
60 #include <sys/vfs_opreg.h>
68 * The I node is the focus of all local file activity in UNIX.
69 * There is a unique inode allocated for each active file,
70 * each current directory, each mounted-on file, each mapping,
71 * and the root. An inode is `named' by its dev/inumber pair.
72 * Data in icommon is read in from permanent inode on volume.
74 * Each inode has 5 locks associated with it:
75 * i_rwlock: Serializes ufs_write and ufs_setattr request
76 * and allows ufs_read requests to proceed in parallel.
77 * Serializes reads/updates to directories.
78 * vfs_dqrwlock: Manages quota sub-system quiescence. See below.
79 * i_contents: Protects almost all of the fields in the inode
80 * except for those listed below. When held
81 * in writer mode also protects those fields
82 * listed under i_tlock.
83 * i_tlock: When i_tlock is held with the i_contents reader
84 * lock the i_atime, i_mtime, i_ctime,
85 * i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag
86 * i_seq, i_writer & i_mapcnt fields are protected.
87 * For more i_flag locking info see below.
88 * ih_lock: Protects inode hash chain buckets
89 * ifree_lock: Protects inode freelist
92 * i_rwlock > i_contents > i_tlock
93 * i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock
94 * i_contents > i_tlock
95 * vfs_dqrwlock > i_contents(writer) > i_tlock
96 * ih_lock > i_contents > i_tlock
98 * Making major changes to quota sub-system state, while the file
99 * system is mounted required the addition of another lock. The
100 * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs
101 * structure. This lock is used to manage quota sub-system quiescence
102 * for a particular file system. Major changes to quota sub-system
103 * state (disabling quotas, enabling quotas, and setting new quota
104 * limits) all require the file system to be quiescent and grabbing
105 * vfs_dqrwlock as writer accomplishes this. On the other hand,
106 * grabbing vfs_dqrwlock as reader makes the quota sub-system
107 * non-quiescent and lets the quota sub-system know that now is not a
108 * good time to change major quota sub-system state. Typically
109 * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for
110 * writing. However, there are cases where vfs_dqrwlock is grabbed for
111 * reading without a corresponding i_contents write grab because there
112 * is no relevant inode. There are also cases where i_contents is
113 * grabbed for writing when a vfs_dqrwlock read grab is not needed
114 * because the inode changes do not affect quotas.
116 * Unfortunately, performance considerations have required that we be more
117 * intelligent about using i_tlock when updating i_flag. Ideally, we would
118 * have simply separated out several of the bits in i_flag into their own
119 * ints to avoid problems. But, instead, we have implemented the following
122 * o You can update any i_flag field while holding the writer-contents,
123 * or by holding the reader-contents AND holding i_tlock.
124 * You can only call ITIMES_NOLOCK while holding the writer-contents,
125 * or by holding the reader-contents AND holding i_tlock.
127 * o For a directory, holding the reader-rw_lock is sufficient for setting
130 * o Races with IREF are avoided by holding the reader contents lock
131 * and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage.
132 * And by holding the writer-contents in ufs_iinactive.
134 * o The callers are no longer required to handle the calls to ITIMES
135 * and ITIMES_NOLOCK. The functions that set the i_flag bits are
136 * responsible for managing those calls. The exceptions are the
139 * SVR4 Extended Fundamental Type (EFT) support:
140 * The inode structure has been enhanced to support
141 * 32-bit user-id, 32-bit group-id, and 32-bit device number.
142 * Standard SVR4 ufs also supports 32-bit mode field. For the reason
143 * of backward compatibility with the previous ufs disk format,
144 * 32-bit mode field is not supported.
146 * The current inode structure is 100% backward compatible with
147 * the previous inode structure if no user-id or group-id exceeds
148 * USHRT_MAX, and no major or minor number of a device number
149 * stored in an inode exceeds 255.
151 * Rules for managing i_seq:
152 * o i_seq is locked under the same rules as i_flag
153 * o The i_ctime or i_mtime MUST never change without increasing
154 * the value of i_seq.
155 * o You may increase the value of i_seq without the timestamps
156 * changing, this may decrease the callers performance but will
157 * be functionally correct.
158 * o The common case is when IUPD or ICHG is set, increase i_seq
159 * and immediately call ITIMES* or ufs_iupdat to create a new timestamp.
160 * o A less common case is the setting of IUPD or ICHG and while still
161 * holding the correct lock defer the timestamp and i_seq update
162 * until later, but it must still be done before the lock is released.
163 * bmap_write is an example of this, where the caller does the update.
164 * o If multiple changes are being made with the timestamps being
165 * updated only at the end, a single increase of i_seq is allowed.
166 * o If changes are made with IUPD or ICHG being set, but
167 * the controlling lock is being dropped before the timestamp is
168 * updated, there is a risk that another thread will also change
169 * the file, update i_flag, and push just one timestamp update.
170 * There is also the risk that another thread calls ITIMES or
171 * ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq,
172 * this will cause ufs_imark to change the timestamps without changing
173 * i_seq. If the controlling lock is dropped, ISEQ must be set to
174 * force i_seq to be increased on next ufs_imark, but i_seq MUST still
175 * be increased by the original setting thread before its deferred
176 * call to ITIMES to insure it is increased the correct number of times.
179 #define UID_LONG (o_uid_t)65535
180 /* flag value to indicate uid is 32-bit long */
181 #define GID_LONG (o_uid_t)65535
182 /* flag value to indicate gid is 32-bit long */
184 #define NDADDR 12 /* direct addresses in inode */
185 #define NIADDR 3 /* indirect addresses in inode */
186 #define FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t)
187 /* max fast symbolic name length is 56 */
189 #define i_fs i_ufsvfs->vfs_bufp->b_un.b_fs
190 #define i_vfs i_vnode->v_vfsp
193 o_mode_t ic_smode
; /* 0: mode and type of file */
194 short ic_nlink
; /* 2: number of links to file */
195 o_uid_t ic_suid
; /* 4: owner's user id */
196 o_gid_t ic_sgid
; /* 6: owner's group id */
197 u_offset_t ic_lsize
; /* 8: number of bytes in file */
199 struct timeval32 ic_atime
; /* 16: time last accessed */
200 struct timeval32 ic_mtime
; /* 24: time last modified */
201 struct timeval32 ic_ctime
; /* 32: last time inode changed */
203 time32_t ic_atime
; /* 16: time last accessed */
205 time32_t ic_mtime
; /* 24: time last modified */
207 time32_t ic_ctime
; /* 32: last time inode changed */
210 daddr32_t ic_db
[NDADDR
]; /* 40: disk block addresses */
211 daddr32_t ic_ib
[NIADDR
]; /* 88: indirect blocks */
212 int32_t ic_flags
; /* 100: cflags */
213 int32_t ic_blocks
; /* 104: 512 byte blocks actually held */
214 int32_t ic_gen
; /* 108: generation number */
215 int32_t ic_shadow
; /* 112: shadow inode */
216 uid_t ic_uid
; /* 116: long EFT version of uid */
217 gid_t ic_gid
; /* 120: long EFT version of gid */
218 uint32_t ic_oeftflag
; /* 124: extended attr directory ino, 0 = none */
222 * Large directories can be cached. Directory caching can take the following
226 CD_DISABLED_NOMEM
= -2,
233 * Large Files: Note we use the inline functions load_double, store_double
234 * to load and store the long long values of i_size. Therefore the
235 * address of i_size must be eight byte aligned. Kmem_alloc of incore
236 * inode structure makes sure that the structure is 8-byte aligned.
237 * XX64 - reorder this structure?
239 typedef struct inode
{
240 struct inode
*i_chain
[2]; /* must be first */
241 struct inode
*i_freef
; /* free list forward - must be before i_ic */
242 struct inode
*i_freeb
; /* free list back - must be before i_ic */
243 struct icommon i_ic
; /* Must be here */
244 struct vnode
*i_vnode
; /* vnode associated with this inode */
245 struct vnode
*i_devvp
; /* vnode for block I/O */
246 dev_t i_dev
; /* device where inode resides */
247 ino_t i_number
; /* i number, 1-to-1 with device address */
248 off_t i_diroff
; /* offset in dir, where we found last entry */
249 /* just a hint - no locking needed */
250 struct ufsvfs
*i_ufsvfs
; /* incore fs associated with inode */
251 struct dquot
*i_dquot
; /* quota structure controlling this file */
252 krwlock_t i_rwlock
; /* serializes write/setattr requests */
253 krwlock_t i_contents
; /* protects (most of) inode contents */
254 kmutex_t i_tlock
; /* protects time fields, i_flag */
255 offset_t i_nextr
; /* */
256 /* next byte read offset (read-ahead) */
257 /* No lock required */
259 uint_t i_flag
; /* inode flags */
260 uint_t i_seq
; /* modification sequence number */
261 cachedir_t i_cachedir
; /* Cache this directory on next lookup */
262 /* - no locking needed */
263 long i_mapcnt
; /* mappings to file pages */
264 int *i_map
; /* block list for the corresponding file */
265 dev_t i_rdev
; /* INCORE rdev from i_oldrdev by ufs_iget */
266 size_t i_delaylen
; /* delayed writes, units=bytes */
267 offset_t i_delayoff
; /* where we started delaying */
268 offset_t i_nextrio
; /* where to start the next clust */
269 long i_writes
; /* number of outstanding bytes in write q */
270 kcondvar_t i_wrcv
; /* sleep/wakeup for write throttle */
271 offset_t i_doff
; /* dinode byte offset in file system */
272 si_t
*i_ufs_acl
; /* pointer to acl entry */
273 dcanchor_t i_danchor
; /* directory cache anchor */
274 kthread_t
*i_writer
; /* thread which is in window in wrip() */
279 struct icommon di_icom
;
284 #define i_mode i_ic.ic_smode
285 #define i_nlink i_ic.ic_nlink
286 #define i_uid i_ic.ic_uid
287 #define i_gid i_ic.ic_gid
288 #define i_smode i_ic.ic_smode
289 #define i_suid i_ic.ic_suid
290 #define i_sgid i_ic.ic_sgid
292 #define i_size i_ic.ic_lsize
293 #define i_db i_ic.ic_db
294 #define i_ib i_ic.ic_ib
296 #define i_atime i_ic.ic_atime
297 #define i_mtime i_ic.ic_mtime
298 #define i_ctime i_ic.ic_ctime
300 #define i_shadow i_ic.ic_shadow
301 #define i_oeftflag i_ic.ic_oeftflag
302 #define i_blocks i_ic.ic_blocks
303 #define i_cflags i_ic.ic_flags
304 #ifdef _LITTLE_ENDIAN
306 * Originally done on x86, but carried on to all other little
307 * architectures, which provides for file system compatibility.
309 #define i_ordev i_ic.ic_db[1] /* USL SVR4 compatibility */
311 #define i_ordev i_ic.ic_db[0] /* was i_oldrdev */
313 #define i_gen i_ic.ic_gen
314 #define i_forw i_chain[0]
315 #define i_back i_chain[1]
317 /* EFT transition aids - obsolete */
318 #define oEFT_MAGIC 0x90909090
319 #define di_oeftflag di_ic.ic_oeftflag
321 #define di_ic di_un.di_icom
322 #define di_mode di_ic.ic_smode
323 #define di_nlink di_ic.ic_nlink
324 #define di_uid di_ic.ic_uid
325 #define di_gid di_ic.ic_gid
326 #define di_smode di_ic.ic_smode
327 #define di_suid di_ic.ic_suid
328 #define di_sgid di_ic.ic_sgid
330 #define di_size di_ic.ic_lsize
331 #define di_db di_ic.ic_db
332 #define di_ib di_ic.ic_ib
334 #define di_atime di_ic.ic_atime
335 #define di_mtime di_ic.ic_mtime
336 #define di_ctime di_ic.ic_ctime
337 #define di_cflags di_ic.ic_flags
339 #ifdef _LITTLE_ENDIAN
340 #define di_ordev di_ic.ic_db[1]
342 #define di_ordev di_ic.ic_db[0]
344 #define di_shadow di_ic.ic_shadow
345 #define di_blocks di_ic.ic_blocks
346 #define di_gen di_ic.ic_gen
349 #define IUPD 0x0001 /* file has been modified */
350 #define IACC 0x0002 /* inode access time to be updated */
351 #define IMOD 0x0004 /* inode has been modified */
352 #define ICHG 0x0008 /* inode has been changed */
353 #define INOACC 0x0010 /* no access time update in getpage */
354 #define IMODTIME 0x0020 /* mod time already set */
355 #define IREF 0x0040 /* inode is being referenced */
356 #define ISYNC 0x0080 /* do all allocation synchronously */
357 #define IFASTSYMLNK 0x0100 /* fast symbolic link */
358 #define IMODACC 0x0200 /* only access time changed; */
359 /* filesystem won't become active */
360 #define IATTCHG 0x0400 /* only size/blocks have changed */
361 #define IBDWRITE 0x0800 /* the inode has been scheduled for */
362 /* write operation asynchronously */
363 #define ISTALE 0x1000 /* inode couldn't be read from disk */
364 #define IDEL 0x2000 /* inode is being deleted */
365 #define IDIRECTIO 0x4000 /* attempt directio */
366 #define ISEQ 0x8000 /* deferred i_seq increase */
367 #define IJUNKIQ 0x10000 /* on junk idle queue */
368 #define IQUIET 0x20000 /* No file system full messages */
371 #define IXATTR 0x0001 /* extended attribute */
372 #define IFALLOCATE 0x0002 /* fallocate'd file */
373 #define ICOMPRESS 0x0004 /* compressed for dcfs - see */
374 /* `ufs_ioctl()`_FIO_COMPRESSED */
377 #define IFMT 0170000 /* type of file */
378 #define IFIFO 0010000 /* named pipe (fifo) */
379 #define IFCHR 0020000 /* character special */
380 #define IFDIR 0040000 /* directory */
381 #define IFBLK 0060000 /* block special */
382 #define IFREG 0100000 /* regular */
383 #define IFLNK 0120000 /* symbolic link */
384 #define IFSHAD 0130000 /* shadow indode */
385 #define IFSOCK 0140000 /* socket */
386 #define IFATTRDIR 0160000 /* Attribute directory */
388 #define ISUID 04000 /* set user id on execution */
389 #define ISGID 02000 /* set group id on execution */
390 #define ISVTX 01000 /* save swapped text even after use */
391 #define IREAD 0400 /* read, write, execute permissions */
395 /* specify how the inode info is written in ufs_syncip() */
396 #define I_SYNC 1 /* wait for the inode written to disk */
397 #define I_DSYNC 2 /* wait for the inode written to disk */
398 /* only if IATTCHG is set */
399 #define I_ASYNC 0 /* don't wait for the inode written */
401 /* flags passed to ufs_itrunc(), indirtrunc(), and free() */
402 #define I_FREE 0x00000001 /* inode is being freed */
403 #define I_DIR 0x00000002 /* inode is a directory */
404 #define I_IBLK 0x00000004 /* indirect block */
405 #define I_CHEAP 0x00000008 /* cheap free */
406 #define I_SHAD 0x00000010 /* inode is a shadow inode */
407 #define I_QUOTA 0x00000020 /* quota file */
408 #define I_NOCANCEL 0x40 /* Don't cancel these fragments */
409 #define I_ACCT 0x00000080 /* Update ufsvfs' unreclaimed_blocks */
412 * If ufs_dircheckforname() fails to find an entry with the given name,
413 * this "slot" structure holds state for ufs_direnter_*() as to where
414 * there is space to put an entry with that name.
415 * If ufs_dircheckforname() finds an entry with the given name, this structure
416 * holds state for ufs_dirrename() and ufs_dirremove() as to where the
417 * entry is. "status" indicates what ufs_dircheckforname() found:
418 * NONE name not found, large enough free slot not found,
419 * FOUND name not found, large enough free slot found
421 * If ufs_dircheckforname() fails due to an error, this structure is not
424 * After ufs_dircheckforname() succeeds the values are:
425 * status offset size fbp, ep
426 * ------ ------ ---- -------
427 * NONE end of dir needed not valid
428 * FOUND start of entry of ent both valid if fbp != NULL
429 * EXIST start of entry of prev ent valid
431 * "endoff" is set to 0 if the an entry with the given name is found, or if no
432 * free slot could be found or made; this means that the directory should not
433 * be truncated. If the entry was found, the search terminates so
434 * ufs_dircheckforname() didn't find out where the last valid entry in the
435 * directory was, so it doesn't know where to cut the directory off; if no free
436 * slot could be found or made, the directory has to be extended to make room
437 * for the new entry, so there's nothing to cut off.
438 * Otherwise, "endoff" is set to the larger of the offset of the last
439 * non-empty entry in the directory, or the offset at which the new entry will
440 * be placed, whichever is larger. This is used by ufs_diraddentry(); if a new
441 * entry is to be added to the directory, any complete directory blocks at the
442 * end of the directory that contain no non-empty entries are lopped off the
443 * end, thus shrinking the directory dynamically.
445 typedef enum {NONE
, FOUND
, EXIST
} slotstat_t
;
447 struct direct
*ep
; /* pointer to slot */
448 struct fbuf
*fbp
; /* dir buf where slot is */
449 off_t offset
; /* offset of area with free space */
450 off_t endoff
; /* last useful location found in search */
451 slotstat_t status
; /* status of slot */
452 int size
; /* size of area at slotoffset */
453 int cached
; /* cached directory */
457 * Statistics on inodes
458 * Not protected by locks
461 kstat_named_t in_size
; /* current cache size */
462 kstat_named_t in_maxsize
; /* maximum cache size */
463 kstat_named_t in_hits
; /* cache hits */
464 kstat_named_t in_misses
; /* cache misses */
465 kstat_named_t in_malloc
; /* kmem_alloce'd */
466 kstat_named_t in_mfree
; /* kmem_free'd */
467 kstat_named_t in_maxreached
; /* Largest size reached by cache */
468 kstat_named_t in_frfront
; /* # put at front of freelist */
469 kstat_named_t in_frback
; /* # put at back of freelist */
470 kstat_named_t in_qfree
; /* q's to delete thread */
471 kstat_named_t in_scan
; /* # inodes scanned */
472 kstat_named_t in_tidles
; /* # inodes idled by idle thread */
473 kstat_named_t in_lidles
; /* # inodes idled by ufs_lookup */
474 kstat_named_t in_vidles
; /* # inodes idled by ufs_vget */
475 kstat_named_t in_kcalloc
; /* # inodes kmem_cache_alloced */
476 kstat_named_t in_kcfree
; /* # inodes kmem_cache_freed */
477 kstat_named_t in_poc
; /* # push-on-close's */
483 * Extended attributes
486 #define XATTR_DIR_NAME "/@/"
487 extern int ufs_ninode
; /* high-water mark for inode cache */
489 extern struct vnodeops
*ufs_vnodeops
; /* vnode operations for ufs */
490 extern const struct fs_operation_def ufs_vnodeops_template
[];
493 * Convert between inode pointers and vnode pointers
495 #define VTOI(VP) ((struct inode *)(VP)->v_data)
496 #define ITOV(IP) ((struct vnode *)(IP)->i_vnode)
501 #define ITOF(IP) ((struct fs *)(IP)->i_fs)
504 * Convert between vnode types and inode formats
506 extern enum vtype iftovt_tab
[];
510 /* Look at sys/mode.h and os/vnode.c */
512 extern int vttoif_tab
[];
517 * Mark an inode with the current (unique) timestamp.
518 * (Note that UFS's concept of time only keeps 32 bits of seconds
519 * in the on-disk format).
521 struct timeval32 iuniqtime
;
522 extern kmutex_t ufs_iuniqtime_lock
;
524 #define ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip)
526 #define ITIMES(ip) { \
527 mutex_enter(&(ip)->i_tlock); \
529 mutex_exit(&(ip)->i_tlock); \
533 * The following interfaces are used to do atomic loads and stores
534 * of an inode's i_size, which is a long long data type.
536 * For LP64, we just to a load or a store - atomicity and alignment
537 * are 8-byte guaranteed. For x86 there are no such instructions,
538 * so we grab i_contents as reader to get the size; we already hold
539 * it as writer when we're setting the size.
544 #define UFS_GET_ISIZE(resultp, ip) *(resultp) = (ip)->i_size
545 #define UFS_SET_ISIZE(value, ip) (ip)->i_size = (value)
549 #define UFS_GET_ISIZE(resultp, ip) \
551 rw_enter(&(ip)->i_contents, RW_READER); \
552 *(resultp) = (ip)->i_size; \
553 rw_exit(&(ip)->i_contents); \
555 #define UFS_SET_ISIZE(value, ip) \
557 ASSERT(RW_WRITE_HELD(&(ip)->i_contents)); \
558 (ip)->i_size = (value); \
564 * Allocate the specified block in the inode
565 * and make sure any in-core pages are initialized.
567 #define BMAPALLOC(ip, off, size, cr) \
568 bmap_write((ip), (u_offset_t)(off), (size), BI_NORMAL, NULL, cr)
570 #define ESAME (-1) /* trying to rename linked files (special) */
572 #define UFS_HOLE (daddr32_t)-1 /* value used when no block allocated */
579 enum de_op
{ DE_CREATE
, DE_MKDIR
, DE_LINK
, DE_RENAME
, DE_SYMLINK
, DE_ATTRDIR
};
582 enum dr_op
{ DR_REMOVE
, DR_RMDIR
, DR_RENAME
};
585 * block initialization type for bmap_write
587 * BI_NORMAL - allocate and zero fill pages in memory
588 * BI_ALLOC_ONLY - only allocate the block, do not zero out pages in mem
589 * BI_FALLOCATE - allocate only, do not zero out pages, and store as negative
590 * block number in inode block list
592 enum bi_type
{ BI_NORMAL
, BI_ALLOC_ONLY
, BI_FALLOCATE
};
595 * This overlays the fid structure (see vfs.h)
597 * LP64 note: we use int32_t instead of ino_t since UFS does not use
598 * inode numbers larger than 32-bits and ufid's are passed to NFS
599 * which expects them to not grow in size beyond 10 bytes (12 including
610 * each ufs thread (see ufs_thread.c) is managed by this struct
614 void *_uq_generic
; /* first entry on q */
616 ufs_failure_t
*_uq_uf
;
618 int uq_ne
; /* # of entries/failures found */
619 int uq_lowat
; /* thread runs when ne == lowat */
620 int uq_hiwat
; /* synchronous idle if ne >= hiwat */
621 ushort_t uq_flags
; /* flags (see below) */
622 kcondvar_t uq_cv
; /* for sleep/wakeup */
623 kthread_id_t uq_threadp
; /* thread managing this q */
624 kmutex_t uq_mutex
; /* protects this struct */
627 #define uq_head _uq_head._uq_generic
628 #define uq_ihead _uq_head._uq_i
629 #define uq_ufhead _uq_head._uq_uf
634 #define UQ_EXIT (0x0001) /* q server exits at its convenience */
635 #define UQ_WAIT (0x0002) /* thread is waiting on q server */
636 #define UQ_SUSPEND (0x0004) /* request for suspension */
637 #define UQ_SUSPENDED (0x0008) /* thread has suspended itself */
640 * When logging is enabled, statvfs must account for blocks and files that
641 * may be on the delete queue. Protected by ufsvfsp->vfs_delete.uq_mutex
643 struct ufs_delq_info
{
644 u_offset_t delq_unreclaimed_blocks
;
645 ulong_t delq_unreclaimed_files
;
651 * The queues are sized dynamically in proportion to ufs_ninode
652 * which, unless overridden, scales with the amount of memory.
653 * The idle queue is halved whenever it hits the low water mark
654 * (1/4 of ufs_ninode), but can burst to sizes much larger. The number
655 * of hash queues is currently maintained to give on average IQHASHQLEN
656 * entries when the idle queue is at the low water mark.
657 * Note, we do not need to search along the hash queues, but use them
658 * in order to batch together geographically local inodes to allow
659 * their updates (via the log or buffer cache) to require less disk seeks.
660 * This gives an incredible performance boost for logging and a boost for
661 * non logging file systems.
664 inode_t
*i_chain
[2]; /* must match inode_t, but unused */
665 inode_t
*i_freef
; /* must match inode_t, idle list forward */
666 inode_t
*i_freeb
; /* must match inode_t, idle list back */
669 extern struct ufs_q ufs_idle_q
; /* used by global ufs idle thread */
670 extern iqhead_t
*ufs_junk_iq
; /* junk idle queues */
671 extern iqhead_t
*ufs_useful_iq
; /* useful idle queues */
672 extern int ufs_njunk_iq
; /* number of entries in junk iq */
673 extern int ufs_nuseful_iq
; /* number of entries in useful iq */
674 extern int ufs_niqhash
; /* number of iq hash qs - power of 2 */
675 extern int ufs_iqhashmask
; /* iq hash mask = ufs_niqhash - 1 */
677 #define IQHASHQLEN 32 /* see comments above */
678 #define INOCGSHIFT 7 /* 128 inodes per cylinder group */
679 #define IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask)
680 #define IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */
682 extern struct ufs_q ufs_hlock
; /* used by global ufs hlock thread */
687 #define UFS_LARGEFILES ((ushort_t)0x1) /* set if mount allows largefiles */
692 #define UFS_DFRATIME 0x1 /* deferred access time */
695 * UFS VFS private data.
697 * UFS file system instances may be linked on several lists.
699 * - The vfs_next field chains together every extant ufs instance; this
700 * list is rooted at ufs_instances and should be used in preference to
701 * the overall vfs list (which is properly the province of the generic
702 * file system code, not of file system implementations). This same list
703 * link is used during forcible unmounts to chain together instances that
704 * can't yet be completely dismantled,
706 * - The vfs_wnext field is used within ufs_update to form a work list of
707 * UFS instances to be synced out.
709 typedef struct ufsvfs
{
710 struct vfs
*vfs_vfs
; /* back link */
711 struct ufsvfs
*vfs_next
; /* instance list link */
712 struct ufsvfs
*vfs_wnext
; /* work list link */
713 struct vnode
*vfs_root
; /* root vnode */
714 struct buf
*vfs_bufp
; /* buffer containing superblock */
715 struct vnode
*vfs_devvp
; /* block device vnode */
716 ushort_t vfs_lfflags
; /* Large files (set by mount) */
717 ushort_t vfs_qflags
; /* QUOTA: filesystem flags */
718 struct inode
*vfs_qinod
; /* QUOTA: pointer to quota file */
719 uint_t vfs_btimelimit
; /* QUOTA: block time limit */
720 uint_t vfs_ftimelimit
; /* QUOTA: file time limit */
721 krwlock_t vfs_dqrwlock
; /* QUOTA: protects quota fields */
723 * some fs local threads
725 struct ufs_q vfs_delete
; /* delayed inode delete */
726 struct ufs_q vfs_reclaim
; /* reclaim open, deleted files */
729 * This is copied from the super block at mount time.
731 int vfs_nrpos
; /* # rotational positions */
733 * This lock protects cg's and super block pointed at by
734 * vfs_bufp->b_fs. Locks contents of fs and cg's and contents
738 struct ulockfs vfs_ulockfs
; /* ufs lockfs support */
739 uint_t vfs_dio
; /* delayed io (_FIODIO) */
740 uint_t vfs_nointr
; /* disallow lockfs interrupts */
741 uint_t vfs_nosetsec
; /* disallow ufs_setsecattr */
742 uint_t vfs_syncdir
; /* synchronous local directory ops */
743 uint_t vfs_dontblock
; /* don't block on forced umount */
746 * trans (logging ufs) stuff
748 uint_t vfs_domatamap
; /* set if matamap enabled */
749 ulong_t vfs_maxacl
; /* transaction stuff - max acl size */
750 ulong_t vfs_dirsize
; /* logspace for directory creation */
751 ulong_t vfs_avgbfree
; /* average free blks in cg (blkpref) */
753 * Some useful constants
755 int vfs_nindirshift
; /* calc. from fs_nindir */
756 int vfs_nindiroffset
; /* calc. from fs_ninidr */
757 int vfs_ioclustsz
; /* bytes in read/write cluster */
758 int vfs_iotransz
; /* max device i/o transfer size */
760 vfs_ufsfx_t vfs_fsfx
; /* lock/fix-on-panic support */
762 * More useful constants
764 int vfs_minfrags
; /* calc. from fs_minfree */
766 * Force DirectIO on all files
768 uint_t vfs_forcedirectio
;
770 * Deferred inode time related fields
772 clock_t vfs_iotstamp
; /* last I/O timestamp */
773 uint_t vfs_dfritime
; /* deferred inode time flags */
775 * Some more useful info
777 dev_t vfs_dev
; /* device mounted from */
778 struct ml_unit
*vfs_log
; /* pointer to embedded log struct */
779 uint_t vfs_noatime
; /* disable inode atime updates */
783 void *vfs_snapshot
; /* snapshot handle */
785 * Controls logging "file system full" messages to messages file
787 clock_t vfs_lastwhinetime
;
789 int vfs_nolog_si
; /* not logging summary info */
790 int vfs_validfs
; /* indicates mounted fs */
793 * Additional information about vfs_delete above
795 struct ufs_delq_info vfs_delete_info
; /* what's on the delete queue */
798 #define vfs_fs vfs_bufp->b_un.b_fs
801 * values for vfs_validfs
803 #define UT_UNMOUNTED 0
805 #define UT_HLOCKING 2
807 /* inohsz is guaranteed to be a power of 2 */
808 #define INOHASH(ino) (((int)ino) & (inohsz - 1))
810 #define ISFALLOCBLK(ip, bn) \
811 (((bn) < 0) && ((bn) % ip->i_fs->fs_frag == 0) && \
812 ((ip)->i_cflags & IFALLOCATE && (bn) != UFS_HOLE))
815 union ihead
*ih_head
[2];
816 struct inode
*ih_chain
[2];
819 extern union ihead
*ihead
;
820 extern kmutex_t
*ih_lock
;
824 extern clock_t ufs_iowait
;
829 * ufs function prototypes
831 #if defined(_KERNEL) && !defined(_BOOT)
833 extern void ufs_iinit(void);
834 extern int ufs_iget(struct vfs
*, ino_t
, struct inode
**, cred_t
*);
835 extern int ufs_iget_alloced(struct vfs
*, ino_t
, struct inode
**,
837 extern void ufs_reset_vnode(vnode_t
*);
838 extern void ufs_iinactive(struct inode
*);
839 extern void ufs_iupdat(struct inode
*, int);
840 extern int ufs_rmidle(struct inode
*);
841 extern int ufs_itrunc(struct inode
*, u_offset_t
, int, cred_t
*);
842 extern int ufs_iaccess(struct inode
*, int, cred_t
*, int);
843 extern int rdip(struct inode
*, struct uio
*, int, struct cred
*);
844 extern int wrip(struct inode
*, struct uio
*, int, struct cred
*);
846 extern void ufs_imark(struct inode
*);
847 extern void ufs_itimes_nolock(struct inode
*);
849 extern int ufs_diraccess(struct inode
*, int, struct cred
*);
850 extern int ufs_dirlook(struct inode
*, char *, struct inode
**,
852 extern int ufs_direnter_cm(struct inode
*, char *, enum de_op
,
853 struct vattr
*, struct inode
**, cred_t
*, int);
854 extern int ufs_direnter_lr(struct inode
*, char *, enum de_op
,
855 struct inode
*, struct inode
*, cred_t
*);
856 extern int ufs_dircheckpath(ino_t
, struct inode
*, struct inode
*,
858 extern int ufs_dirmakeinode(struct inode
*, struct inode
**,
859 struct vattr
*, enum de_op
, cred_t
*);
860 extern int ufs_dirremove(struct inode
*, char *, struct inode
*,
861 vnode_t
*, enum dr_op
, cred_t
*);
862 extern int ufs_dircheckforname(struct inode
*, char *, int,
863 struct ufs_slot
*, struct inode
**, struct cred
*, int);
864 extern int ufs_xattrdirempty(struct inode
*, ino_t
, cred_t
*);
865 extern int blkatoff(struct inode
*, off_t
, char **, struct fbuf
**);
867 extern void sbupdate(struct vfs
*);
869 extern int ufs_ialloc(struct inode
*, ino_t
, mode_t
, struct inode
**,
871 extern void ufs_ifree(struct inode
*, ino_t
, mode_t
);
872 extern void free(struct inode
*, daddr_t
, off_t
, int);
873 extern int alloc(struct inode
*, daddr_t
, int, daddr_t
*, cred_t
*);
874 extern int realloccg(struct inode
*, daddr_t
, daddr_t
, int, int,
875 daddr_t
*, cred_t
*);
876 extern int ufs_allocsp(struct vnode
*, struct flock64
*, cred_t
*);
877 extern int ufs_freesp(struct vnode
*, struct flock64
*, int, cred_t
*);
878 extern ino_t
dirpref(inode_t
*);
879 extern daddr_t
blkpref(struct inode
*, daddr_t
, int, daddr32_t
*);
880 extern daddr_t
contigpref(ufsvfs_t
*, size_t, size_t);
882 extern int ufs_rdwri(enum uio_rw
, int, struct inode
*, caddr_t
, ssize_t
,
883 offset_t
, enum uio_seg
, int *, cred_t
*);
885 extern int bmap_read(struct inode
*, u_offset_t
, daddr_t
*, int *);
886 extern int bmap_write(struct inode
*, u_offset_t
, int, enum bi_type
,
887 daddr_t
*, struct cred
*);
888 extern int bmap_has_holes(struct inode
*);
889 extern int bmap_find(struct inode
*, boolean_t
, u_offset_t
*);
890 extern int bmap_set_bn(struct vnode
*, u_offset_t
, daddr32_t
);
892 extern void ufs_vfs_add(struct ufsvfs
*);
893 extern void ufs_vfs_remove(struct ufsvfs
*);
895 extern void ufs_sbwrite(struct ufsvfs
*);
896 extern void ufs_update(int);
897 extern int ufs_getsummaryinfo(dev_t
, struct ufsvfs
*, struct fs
*);
898 extern int ufs_putsummaryinfo(dev_t
, struct ufsvfs
*, struct fs
*);
899 extern int ufs_syncip(struct inode
*, int, int, top_t
);
900 extern int ufs_sync_indir(struct inode
*);
901 extern int ufs_indirblk_sync(struct inode
*, offset_t
);
902 extern int ufs_badblock(struct inode
*, daddr_t
);
903 extern int ufs_indir_badblock(struct inode
*, daddr32_t
*);
904 extern void ufs_notclean(struct ufsvfs
*);
905 extern void ufs_checkclean(struct vfs
*);
906 extern int isblock(struct fs
*, uchar_t
*, daddr_t
);
907 extern void setblock(struct fs
*, uchar_t
*, daddr_t
);
908 extern void clrblock(struct fs
*, uchar_t
*, daddr_t
);
909 extern int isclrblock(struct fs
*, uchar_t
*, daddr_t
);
910 extern void fragacct(struct fs
*, int, int32_t *, int);
911 extern int skpc(char, uint_t
, char *);
912 extern int ufs_fbwrite(struct fbuf
*, struct inode
*);
913 extern int ufs_fbiwrite(struct fbuf
*, struct inode
*, daddr_t
, long);
914 extern int ufs_putapage(struct vnode
*, struct page
*, u_offset_t
*,
915 size_t *, int, struct cred
*);
916 extern inode_t
*ufs_alloc_inode(ufsvfs_t
*, ino_t
);
917 extern void ufs_free_inode(inode_t
*);
922 extern void ufs_setreclaim(struct inode
*);
923 extern int ufs_scan_inodes(int, int (*)(struct inode
*, void *), void *,
925 extern int ufs_sync_inode(struct inode
*, void *);
926 extern int ufs_sticky_remove_access(struct inode
*, struct inode
*,
931 extern int chkiq(struct ufsvfs
*, int, struct inode
*, uid_t
, int,
932 struct cred
*, char **errp
, size_t *lenp
);
937 extern void ufs_thread_delete(struct vfs
*);
938 extern void ufs_delete_drain(struct vfs
*, int, int);
939 extern void ufs_delete(struct ufsvfs
*, struct inode
*, int);
940 extern void ufs_inode_cache_reclaim(void *);
941 extern void ufs_idle_drain(struct vfs
*);
942 extern void ufs_idle_some(int);
943 extern void ufs_thread_idle(void);
944 extern void ufs_thread_reclaim(struct vfs
*);
945 extern void ufs_thread_init(struct ufs_q
*, int);
946 extern void ufs_thread_start(struct ufs_q
*, void (*)(), struct vfs
*);
947 extern void ufs_thread_exit(struct ufs_q
*);
948 extern void ufs_thread_suspend(struct ufs_q
*);
949 extern void ufs_thread_continue(struct ufs_q
*);
950 extern void ufs_thread_hlock(void *);
951 extern void ufs_delete_init(struct ufsvfs
*, int);
952 extern void ufs_delete_adjust_stats(struct ufsvfs
*, struct statvfs64
*);
953 extern void ufs_delete_drain_wait(struct ufsvfs
*, int);
959 extern int ufs_reconcile_fs(struct vfs
*, struct ufsvfs
*, int);
960 extern int ufs_quiesce(struct ulockfs
*);
961 extern int ufs_flush(struct vfs
*);
962 extern int ufs_fiolfs(struct vnode
*, struct lockfs
*, int);
963 extern int ufs__fiolfs(struct vnode
*, struct lockfs
*, int, int);
964 extern int ufs_fiolfss(struct vnode
*, struct lockfs
*);
965 extern int ufs_fioffs(struct vnode
*, char *, struct cred
*);
966 extern int ufs_check_lockfs(struct ufsvfs
*, struct ulockfs
*, ulong_t
);
967 extern int ufs_lockfs_begin(struct ufsvfs
*, struct ulockfs
**, ulong_t
);
968 extern int ufs_lockfs_trybegin(struct ufsvfs
*, struct ulockfs
**, ulong_t
);
969 extern int ufs_lockfs_begin_getpage(struct ufsvfs
*, struct ulockfs
**,
970 struct seg
*, int, uint_t
*);
971 extern void ufs_lockfs_end(struct ulockfs
*);
975 extern int ufs_si_inherit(struct inode
*, struct inode
*, o_mode_t
, cred_t
*);
976 extern void si_cache_init(void);
977 extern int ufs_si_load(struct inode
*, cred_t
*);
978 extern void ufs_si_del(struct inode
*);
979 extern int ufs_acl_access(struct inode
*, int, cred_t
*);
980 extern void ufs_si_cache_flush(dev_t
);
981 extern int ufs_si_free(si_t
*, struct vfs
*, cred_t
*);
982 extern int ufs_acl_setattr(struct inode
*, struct vattr
*, cred_t
*);
983 extern int ufs_acl_get(struct inode
*, vsecattr_t
*, int, cred_t
*);
984 extern int ufs_acl_set(struct inode
*, vsecattr_t
*, int, cred_t
*);
988 extern void ufs_directio_init();
989 extern int ufs_directio_write(struct inode
*, uio_t
*, int, int, cred_t
*,
991 extern int ufs_directio_read(struct inode
*, uio_t
*, cred_t
*, int *);
992 #define DIRECTIO_FAILURE (0)
993 #define DIRECTIO_SUCCESS (1)
996 * ufs extensions for PXFS
999 int ufs_rdwr_data(vnode_t
*vp
, u_offset_t offset
, size_t len
, fdbuffer_t
*fdb
,
1000 int flags
, cred_t
*cr
);
1001 int ufs_alloc_data(vnode_t
*vp
, u_offset_t offset
, size_t *len
, fdbuffer_t
*fdb
,
1002 int flags
, cred_t
*cr
);
1005 * prototypes to support the forced unmount
1008 void ufs_freeze(struct ulockfs
*, struct lockfs
*);
1009 int ufs_thaw(struct vfs
*, struct ufsvfs
*, struct ulockfs
*);
1012 * extended attributes
1015 int ufs_xattrmkdir(inode_t
*, inode_t
**, int, struct cred
*);
1016 int ufs_xattr_getattrdir(vnode_t
*, inode_t
**, int, struct cred
*);
1017 void ufs_unhook_shadow(inode_t
*, inode_t
*);
1019 #endif /* defined(_KERNEL) && !defined(_BOOT) */
1025 #endif /* _SYS_FS_UFS_INODE_H */