1 // File system implementation. Four layers:
2 // + Blocks: allocator for raw disk blocks.
3 // + Files: inode allocator, reading, writing, metadata.
4 // + Directories: inode with special contents (list of other inodes!)
5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
9 // This file contains the low-level file system manipulation
10 // routines. The (higher-level) system call implementations
24 #define min(a, b) ((a) < (b) ? (a) : (b))
25 static void itrunc(struct inode
*);
27 // Read the super block.
29 readsb(int dev
, struct superblock
*sb
)
34 memmove(sb
, bp
->data
, sizeof(*sb
));
40 bzero(int dev
, int bno
)
45 memset(bp
->data
, 0, BSIZE
);
52 // Allocate a disk block.
62 for(b
= 0; b
< sb
.size
; b
+= BPB
){
63 bp
= bread(dev
, BBLOCK(b
, sb
.ninodes
));
64 for(bi
= 0; bi
< BPB
; bi
++){
66 if((bp
->data
[bi
/8] & m
) == 0){ // Is block free?
67 bp
->data
[bi
/8] |= m
; // Mark block in use on disk.
75 panic("balloc: out of blocks");
80 bfree(int dev
, uint b
)
89 bp
= bread(dev
, BBLOCK(b
, sb
.ninodes
));
92 if((bp
->data
[bi
/8] & m
) == 0)
93 panic("freeing free block");
94 bp
->data
[bi
/8] &= ~m
; // Mark block free on disk.
101 // An inode is a single, unnamed file in the file system.
102 // The inode disk structure holds metadata (the type, device numbers,
103 // and data size) along with a list of blocks where the associated
104 // data can be found.
106 // The inodes are laid out sequentially on disk immediately after
107 // the superblock. The kernel keeps a cache of the in-use
108 // on-disk structures to provide a place for synchronizing access
109 // to inodes shared between multiple processes.
111 // ip->ref counts the number of pointer references to this cached
112 // inode; references are typically kept in struct file and in proc->cwd.
113 // When ip->ref falls to zero, the inode is no longer cached.
114 // It is an error to use an inode without holding a reference to it.
116 // Processes are only allowed to read and write inode
117 // metadata and contents when holding the inode's lock,
118 // represented by the I_BUSY flag in the in-memory copy.
119 // Because inode locks are held during disk accesses,
120 // they are implemented using a flag rather than with
121 // spin locks. Callers are responsible for locking
122 // inodes before passing them to routines in this file; leaving
123 // this responsibility with the caller makes it possible for them
124 // to create arbitrarily-sized atomic operations.
126 // To give maximum control over locking to the callers,
127 // the routines in this file that return inode pointers
128 // return pointers to *unlocked* inodes. It is the callers'
129 // responsibility to lock them before using them. A non-zero
130 // ip->ref keeps these unlocked inodes in the cache.
133 struct spinlock lock
;
134 struct inode inode
[NINODE
];
140 initlock(&icache
.lock
, "icache");
143 static struct inode
* iget(uint dev
, uint inum
);
145 // Allocate a new inode with the given type on device dev.
147 ialloc(uint dev
, short type
)
152 struct superblock sb
;
155 for(inum
= 1; inum
< sb
.ninodes
; inum
++){ // loop over inode blocks
156 bp
= bread(dev
, IBLOCK(inum
));
157 dip
= (struct dinode
*)bp
->data
+ inum
%IPB
;
158 if(dip
->type
== 0){ // a free inode
159 memset(dip
, 0, sizeof(*dip
));
161 bwrite(bp
); // mark it allocated on the disk
163 return iget(dev
, inum
);
167 panic("ialloc: no inodes");
170 // Copy inode, which has changed, from memory to disk.
172 iupdate(struct inode
*ip
)
177 bp
= bread(ip
->dev
, IBLOCK(ip
->inum
));
178 dip
= (struct dinode
*)bp
->data
+ ip
->inum
%IPB
;
179 dip
->type
= ip
->type
;
180 dip
->major
= ip
->major
;
181 dip
->minor
= ip
->minor
;
182 dip
->nlink
= ip
->nlink
;
183 dip
->size
= ip
->size
;
184 memmove(dip
->addrs
, ip
->addrs
, sizeof(ip
->addrs
));
189 // Find the inode with number inum on device dev
190 // and return the in-memory copy.
192 iget(uint dev
, uint inum
)
194 struct inode
*ip
, *empty
;
196 acquire(&icache
.lock
);
198 // Try for cached inode.
200 for(ip
= &icache
.inode
[0]; ip
< &icache
.inode
[NINODE
]; ip
++){
201 if(ip
->ref
> 0 && ip
->dev
== dev
&& ip
->inum
== inum
){
203 release(&icache
.lock
);
206 if(empty
== 0 && ip
->ref
== 0) // Remember empty slot.
210 // Allocate fresh inode.
212 panic("iget: no inodes");
219 release(&icache
.lock
);
224 // Increment reference count for ip.
225 // Returns ip to enable ip = idup(ip1) idiom.
227 idup(struct inode
*ip
)
229 acquire(&icache
.lock
);
231 release(&icache
.lock
);
235 // Lock the given inode.
237 ilock(struct inode
*ip
)
242 if(ip
== 0 || ip
->ref
< 1)
245 acquire(&icache
.lock
);
246 while(ip
->flags
& I_BUSY
)
247 sleep(ip
, &icache
.lock
);
249 release(&icache
.lock
);
251 if(!(ip
->flags
& I_VALID
)){
252 bp
= bread(ip
->dev
, IBLOCK(ip
->inum
));
253 dip
= (struct dinode
*)bp
->data
+ ip
->inum
%IPB
;
254 ip
->type
= dip
->type
;
255 ip
->major
= dip
->major
;
256 ip
->minor
= dip
->minor
;
257 ip
->nlink
= dip
->nlink
;
258 ip
->size
= dip
->size
;
259 memmove(ip
->addrs
, dip
->addrs
, sizeof(ip
->addrs
));
261 ip
->flags
|= I_VALID
;
263 panic("ilock: no type");
267 // Unlock the given inode.
269 iunlock(struct inode
*ip
)
271 if(ip
== 0 || !(ip
->flags
& I_BUSY
) || ip
->ref
< 1)
274 acquire(&icache
.lock
);
275 ip
->flags
&= ~I_BUSY
;
277 release(&icache
.lock
);
280 // Caller holds reference to unlocked ip. Drop reference.
282 iput(struct inode
*ip
)
284 acquire(&icache
.lock
);
285 if(ip
->ref
== 1 && (ip
->flags
& I_VALID
) && ip
->nlink
== 0){
286 // inode is no longer used: truncate and free inode.
287 if(ip
->flags
& I_BUSY
)
290 release(&icache
.lock
);
294 acquire(&icache
.lock
);
299 release(&icache
.lock
);
302 // Common idiom: unlock, then put.
304 iunlockput(struct inode
*ip
)
312 // The contents (data) associated with each inode is stored
313 // in a sequence of blocks on the disk. The first NDIRECT blocks
314 // are listed in ip->addrs[]. The next NINDIRECT blocks are
315 // listed in the block ip->addrs[NDIRECT].
317 // Return the disk block address of the nth block in inode ip.
318 // If there is no such block, bmap allocates one.
320 bmap(struct inode
*ip
, uint bn
)
326 if((addr
= ip
->addrs
[bn
]) == 0)
327 ip
->addrs
[bn
] = addr
= balloc(ip
->dev
);
333 // Load indirect block, allocating if necessary.
334 if((addr
= ip
->addrs
[NDIRECT
]) == 0)
335 ip
->addrs
[NDIRECT
] = addr
= balloc(ip
->dev
);
336 bp
= bread(ip
->dev
, addr
);
338 if((addr
= a
[bn
]) == 0){
339 a
[bn
] = addr
= balloc(ip
->dev
);
346 panic("bmap: out of range");
349 // Truncate inode (discard contents).
350 // Only called after the last dirent referring
351 // to this inode has been erased on disk.
353 itrunc(struct inode
*ip
)
359 for(i
= 0; i
< NDIRECT
; i
++){
361 bfree(ip
->dev
, ip
->addrs
[i
]);
366 if(ip
->addrs
[NDIRECT
]){
367 bp
= bread(ip
->dev
, ip
->addrs
[NDIRECT
]);
369 for(j
= 0; j
< NINDIRECT
; j
++){
371 bfree(ip
->dev
, a
[j
]);
374 bfree(ip
->dev
, ip
->addrs
[NDIRECT
]);
375 ip
->addrs
[NDIRECT
] = 0;
382 // Copy stat information from inode.
384 stati(struct inode
*ip
, struct stat
*st
)
389 st
->nlink
= ip
->nlink
;
393 // Read data from inode.
395 readi(struct inode
*ip
, char *dst
, uint off
, uint n
)
400 if(ip
->type
== T_DEV
){
401 if(ip
->major
< 0 || ip
->major
>= NDEV
|| !devsw
[ip
->major
].read
)
403 return devsw
[ip
->major
].read(ip
, dst
, n
);
406 if(off
> ip
->size
|| off
+ n
< off
)
408 if(off
+ n
> ip
->size
)
411 for(tot
=0; tot
<n
; tot
+=m
, off
+=m
, dst
+=m
){
412 bp
= bread(ip
->dev
, bmap(ip
, off
/BSIZE
));
413 m
= min(n
- tot
, BSIZE
- off
%BSIZE
);
414 memmove(dst
, bp
->data
+ off
%BSIZE
, m
);
420 // Write data to inode.
422 writei(struct inode
*ip
, char *src
, uint off
, uint n
)
427 if(ip
->type
== T_DEV
){
428 if(ip
->major
< 0 || ip
->major
>= NDEV
|| !devsw
[ip
->major
].write
)
430 return devsw
[ip
->major
].write(ip
, src
, n
);
433 if(off
> ip
->size
|| off
+ n
< off
)
435 if(off
+ n
> MAXFILE
*BSIZE
)
436 n
= MAXFILE
*BSIZE
- off
;
438 for(tot
=0; tot
<n
; tot
+=m
, off
+=m
, src
+=m
){
439 bp
= bread(ip
->dev
, bmap(ip
, off
/BSIZE
));
440 m
= min(n
- tot
, BSIZE
- off
%BSIZE
);
441 memmove(bp
->data
+ off
%BSIZE
, src
, m
);
446 if(n
> 0 && off
> ip
->size
){
456 namecmp(const char *s
, const char *t
)
458 return strncmp(s
, t
, DIRSIZ
);
461 // Look for a directory entry in a directory.
462 // If found, set *poff to byte offset of entry.
463 // Caller must have already locked dp.
465 dirlookup(struct inode
*dp
, char *name
, uint
*poff
)
471 if(dp
->type
!= T_DIR
)
472 panic("dirlookup not DIR");
474 for(off
= 0; off
< dp
->size
; off
+= BSIZE
){
475 bp
= bread(dp
->dev
, bmap(dp
, off
/ BSIZE
));
476 for(de
= (struct dirent
*)bp
->data
;
477 de
< (struct dirent
*)(bp
->data
+ BSIZE
);
481 if(namecmp(name
, de
->name
) == 0){
482 // entry matches path element
484 *poff
= off
+ (uchar
*)de
- bp
->data
;
487 return iget(dp
->dev
, inum
);
495 // Write a new directory entry (name, inum) into the directory dp.
497 dirlink(struct inode
*dp
, char *name
, uint inum
)
503 // Check that name is not present.
504 if((ip
= dirlookup(dp
, name
, 0)) != 0){
509 // Look for an empty dirent.
510 for(off
= 0; off
< dp
->size
; off
+= sizeof(de
)){
511 if(readi(dp
, (char*)&de
, off
, sizeof(de
)) != sizeof(de
))
512 panic("dirlink read");
517 strncpy(de
.name
, name
, DIRSIZ
);
519 if(writei(dp
, (char*)&de
, off
, sizeof(de
)) != sizeof(de
))
527 // Copy the next path element from path into name.
528 // Return a pointer to the element following the copied one.
529 // The returned path has no leading slashes,
530 // so the caller can check *path=='\0' to see if the name is the last one.
531 // If no name to remove, return 0.
534 // skipelem("a/bb/c", name) = "bb/c", setting name = "a"
535 // skipelem("///a//bb", name) = "bb", setting name = "a"
536 // skipelem("a", name) = "", setting name = "a"
537 // skipelem("", name) = skipelem("////", name) = 0
540 skipelem(char *path
, char *name
)
550 while(*path
!= '/' && *path
!= 0)
554 memmove(name
, s
, DIRSIZ
);
556 memmove(name
, s
, len
);
564 // Look up and return the inode for a path name.
565 // If parent != 0, return the inode for the parent and copy the final
566 // path element into name, which must have room for DIRSIZ bytes.
568 namex(char *path
, int nameiparent
, char *name
)
570 struct inode
*ip
, *next
;
573 ip
= iget(ROOTDEV
, ROOTINO
);
575 ip
= idup(proc
->cwd
);
577 while((path
= skipelem(path
, name
)) != 0){
579 if(ip
->type
!= T_DIR
){
583 if(nameiparent
&& *path
== '\0'){
584 // Stop one level early.
588 if((next
= dirlookup(ip
, name
, 0)) == 0){
606 return namex(path
, 0, name
);
610 nameiparent(char *path
, char *name
)
612 return namex(path
, 1, name
);