added base src
[xv6-db.git] / fs.c
blob2d519b0b42c8f0b50536c6c6de64516f525a8d6e
1 // File system implementation. Four layers:
2 // + Blocks: allocator for raw disk blocks.
3 // + Files: inode allocator, reading, writing, metadata.
4 // + Directories: inode with special contents (list of other inodes!)
5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines. The (higher-level) system call implementations
11 // are in sysfile.c.
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "file.h"
24 #define min(a, b) ((a) < (b) ? (a) : (b))
25 static void itrunc(struct inode*);
27 // Read the super block.
28 static void
29 readsb(int dev, struct superblock *sb)
31 struct buf *bp;
33 bp = bread(dev, 1);
34 memmove(sb, bp->data, sizeof(*sb));
35 brelse(bp);
38 // Zero a block.
39 static void
40 bzero(int dev, int bno)
42 struct buf *bp;
44 bp = bread(dev, bno);
45 memset(bp->data, 0, BSIZE);
46 bwrite(bp);
47 brelse(bp);
50 // Blocks.
52 // Allocate a disk block.
53 static uint
54 balloc(uint dev)
56 int b, bi, m;
57 struct buf *bp;
58 struct superblock sb;
60 bp = 0;
61 readsb(dev, &sb);
62 for(b = 0; b < sb.size; b += BPB){
63 bp = bread(dev, BBLOCK(b, sb.ninodes));
64 for(bi = 0; bi < BPB; bi++){
65 m = 1 << (bi % 8);
66 if((bp->data[bi/8] & m) == 0){ // Is block free?
67 bp->data[bi/8] |= m; // Mark block in use on disk.
68 bwrite(bp);
69 brelse(bp);
70 return b + bi;
73 brelse(bp);
75 panic("balloc: out of blocks");
78 // Free a disk block.
79 static void
80 bfree(int dev, uint b)
82 struct buf *bp;
83 struct superblock sb;
84 int bi, m;
86 bzero(dev, b);
88 readsb(dev, &sb);
89 bp = bread(dev, BBLOCK(b, sb.ninodes));
90 bi = b % BPB;
91 m = 1 << (bi % 8);
92 if((bp->data[bi/8] & m) == 0)
93 panic("freeing free block");
94 bp->data[bi/8] &= ~m; // Mark block free on disk.
95 bwrite(bp);
96 brelse(bp);
99 // Inodes.
101 // An inode is a single, unnamed file in the file system.
102 // The inode disk structure holds metadata (the type, device numbers,
103 // and data size) along with a list of blocks where the associated
104 // data can be found.
106 // The inodes are laid out sequentially on disk immediately after
107 // the superblock. The kernel keeps a cache of the in-use
108 // on-disk structures to provide a place for synchronizing access
109 // to inodes shared between multiple processes.
111 // ip->ref counts the number of pointer references to this cached
112 // inode; references are typically kept in struct file and in proc->cwd.
113 // When ip->ref falls to zero, the inode is no longer cached.
114 // It is an error to use an inode without holding a reference to it.
116 // Processes are only allowed to read and write inode
117 // metadata and contents when holding the inode's lock,
118 // represented by the I_BUSY flag in the in-memory copy.
119 // Because inode locks are held during disk accesses,
120 // they are implemented using a flag rather than with
121 // spin locks. Callers are responsible for locking
122 // inodes before passing them to routines in this file; leaving
123 // this responsibility with the caller makes it possible for them
124 // to create arbitrarily-sized atomic operations.
126 // To give maximum control over locking to the callers,
127 // the routines in this file that return inode pointers
128 // return pointers to *unlocked* inodes. It is the callers'
129 // responsibility to lock them before using them. A non-zero
130 // ip->ref keeps these unlocked inodes in the cache.
132 struct {
133 struct spinlock lock;
134 struct inode inode[NINODE];
135 } icache;
137 void
138 iinit(void)
140 initlock(&icache.lock, "icache");
143 static struct inode* iget(uint dev, uint inum);
145 // Allocate a new inode with the given type on device dev.
146 struct inode*
147 ialloc(uint dev, short type)
149 int inum;
150 struct buf *bp;
151 struct dinode *dip;
152 struct superblock sb;
154 readsb(dev, &sb);
155 for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks
156 bp = bread(dev, IBLOCK(inum));
157 dip = (struct dinode*)bp->data + inum%IPB;
158 if(dip->type == 0){ // a free inode
159 memset(dip, 0, sizeof(*dip));
160 dip->type = type;
161 bwrite(bp); // mark it allocated on the disk
162 brelse(bp);
163 return iget(dev, inum);
165 brelse(bp);
167 panic("ialloc: no inodes");
170 // Copy inode, which has changed, from memory to disk.
171 void
172 iupdate(struct inode *ip)
174 struct buf *bp;
175 struct dinode *dip;
177 bp = bread(ip->dev, IBLOCK(ip->inum));
178 dip = (struct dinode*)bp->data + ip->inum%IPB;
179 dip->type = ip->type;
180 dip->major = ip->major;
181 dip->minor = ip->minor;
182 dip->nlink = ip->nlink;
183 dip->size = ip->size;
184 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
185 bwrite(bp);
186 brelse(bp);
189 // Find the inode with number inum on device dev
190 // and return the in-memory copy.
191 static struct inode*
192 iget(uint dev, uint inum)
194 struct inode *ip, *empty;
196 acquire(&icache.lock);
198 // Try for cached inode.
199 empty = 0;
200 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
201 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
202 ip->ref++;
203 release(&icache.lock);
204 return ip;
206 if(empty == 0 && ip->ref == 0) // Remember empty slot.
207 empty = ip;
210 // Allocate fresh inode.
211 if(empty == 0)
212 panic("iget: no inodes");
214 ip = empty;
215 ip->dev = dev;
216 ip->inum = inum;
217 ip->ref = 1;
218 ip->flags = 0;
219 release(&icache.lock);
221 return ip;
224 // Increment reference count for ip.
225 // Returns ip to enable ip = idup(ip1) idiom.
226 struct inode*
227 idup(struct inode *ip)
229 acquire(&icache.lock);
230 ip->ref++;
231 release(&icache.lock);
232 return ip;
235 // Lock the given inode.
236 void
237 ilock(struct inode *ip)
239 struct buf *bp;
240 struct dinode *dip;
242 if(ip == 0 || ip->ref < 1)
243 panic("ilock");
245 acquire(&icache.lock);
246 while(ip->flags & I_BUSY)
247 sleep(ip, &icache.lock);
248 ip->flags |= I_BUSY;
249 release(&icache.lock);
251 if(!(ip->flags & I_VALID)){
252 bp = bread(ip->dev, IBLOCK(ip->inum));
253 dip = (struct dinode*)bp->data + ip->inum%IPB;
254 ip->type = dip->type;
255 ip->major = dip->major;
256 ip->minor = dip->minor;
257 ip->nlink = dip->nlink;
258 ip->size = dip->size;
259 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
260 brelse(bp);
261 ip->flags |= I_VALID;
262 if(ip->type == 0)
263 panic("ilock: no type");
267 // Unlock the given inode.
268 void
269 iunlock(struct inode *ip)
271 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
272 panic("iunlock");
274 acquire(&icache.lock);
275 ip->flags &= ~I_BUSY;
276 wakeup(ip);
277 release(&icache.lock);
280 // Caller holds reference to unlocked ip. Drop reference.
281 void
282 iput(struct inode *ip)
284 acquire(&icache.lock);
285 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
286 // inode is no longer used: truncate and free inode.
287 if(ip->flags & I_BUSY)
288 panic("iput busy");
289 ip->flags |= I_BUSY;
290 release(&icache.lock);
291 itrunc(ip);
292 ip->type = 0;
293 iupdate(ip);
294 acquire(&icache.lock);
295 ip->flags = 0;
296 wakeup(ip);
298 ip->ref--;
299 release(&icache.lock);
302 // Common idiom: unlock, then put.
303 void
304 iunlockput(struct inode *ip)
306 iunlock(ip);
307 iput(ip);
310 // Inode contents
312 // The contents (data) associated with each inode is stored
313 // in a sequence of blocks on the disk. The first NDIRECT blocks
314 // are listed in ip->addrs[]. The next NINDIRECT blocks are
315 // listed in the block ip->addrs[NDIRECT].
317 // Return the disk block address of the nth block in inode ip.
318 // If there is no such block, bmap allocates one.
319 static uint
320 bmap(struct inode *ip, uint bn)
322 uint addr, *a;
323 struct buf *bp;
325 if(bn < NDIRECT){
326 if((addr = ip->addrs[bn]) == 0)
327 ip->addrs[bn] = addr = balloc(ip->dev);
328 return addr;
330 bn -= NDIRECT;
332 if(bn < NINDIRECT){
333 // Load indirect block, allocating if necessary.
334 if((addr = ip->addrs[NDIRECT]) == 0)
335 ip->addrs[NDIRECT] = addr = balloc(ip->dev);
336 bp = bread(ip->dev, addr);
337 a = (uint*)bp->data;
338 if((addr = a[bn]) == 0){
339 a[bn] = addr = balloc(ip->dev);
340 bwrite(bp);
342 brelse(bp);
343 return addr;
346 panic("bmap: out of range");
349 // Truncate inode (discard contents).
350 // Only called after the last dirent referring
351 // to this inode has been erased on disk.
352 static void
353 itrunc(struct inode *ip)
355 int i, j;
356 struct buf *bp;
357 uint *a;
359 for(i = 0; i < NDIRECT; i++){
360 if(ip->addrs[i]){
361 bfree(ip->dev, ip->addrs[i]);
362 ip->addrs[i] = 0;
366 if(ip->addrs[NDIRECT]){
367 bp = bread(ip->dev, ip->addrs[NDIRECT]);
368 a = (uint*)bp->data;
369 for(j = 0; j < NINDIRECT; j++){
370 if(a[j])
371 bfree(ip->dev, a[j]);
373 brelse(bp);
374 bfree(ip->dev, ip->addrs[NDIRECT]);
375 ip->addrs[NDIRECT] = 0;
378 ip->size = 0;
379 iupdate(ip);
382 // Copy stat information from inode.
383 void
384 stati(struct inode *ip, struct stat *st)
386 st->dev = ip->dev;
387 st->ino = ip->inum;
388 st->type = ip->type;
389 st->nlink = ip->nlink;
390 st->size = ip->size;
393 // Read data from inode.
395 readi(struct inode *ip, char *dst, uint off, uint n)
397 uint tot, m;
398 struct buf *bp;
400 if(ip->type == T_DEV){
401 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
402 return -1;
403 return devsw[ip->major].read(ip, dst, n);
406 if(off > ip->size || off + n < off)
407 return -1;
408 if(off + n > ip->size)
409 n = ip->size - off;
411 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
412 bp = bread(ip->dev, bmap(ip, off/BSIZE));
413 m = min(n - tot, BSIZE - off%BSIZE);
414 memmove(dst, bp->data + off%BSIZE, m);
415 brelse(bp);
417 return n;
420 // Write data to inode.
422 writei(struct inode *ip, char *src, uint off, uint n)
424 uint tot, m;
425 struct buf *bp;
427 if(ip->type == T_DEV){
428 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
429 return -1;
430 return devsw[ip->major].write(ip, src, n);
433 if(off > ip->size || off + n < off)
434 return -1;
435 if(off + n > MAXFILE*BSIZE)
436 n = MAXFILE*BSIZE - off;
438 for(tot=0; tot<n; tot+=m, off+=m, src+=m){
439 bp = bread(ip->dev, bmap(ip, off/BSIZE));
440 m = min(n - tot, BSIZE - off%BSIZE);
441 memmove(bp->data + off%BSIZE, src, m);
442 bwrite(bp);
443 brelse(bp);
446 if(n > 0 && off > ip->size){
447 ip->size = off;
448 iupdate(ip);
450 return n;
453 // Directories
456 namecmp(const char *s, const char *t)
458 return strncmp(s, t, DIRSIZ);
461 // Look for a directory entry in a directory.
462 // If found, set *poff to byte offset of entry.
463 // Caller must have already locked dp.
464 struct inode*
465 dirlookup(struct inode *dp, char *name, uint *poff)
467 uint off, inum;
468 struct buf *bp;
469 struct dirent *de;
471 if(dp->type != T_DIR)
472 panic("dirlookup not DIR");
474 for(off = 0; off < dp->size; off += BSIZE){
475 bp = bread(dp->dev, bmap(dp, off / BSIZE));
476 for(de = (struct dirent*)bp->data;
477 de < (struct dirent*)(bp->data + BSIZE);
478 de++){
479 if(de->inum == 0)
480 continue;
481 if(namecmp(name, de->name) == 0){
482 // entry matches path element
483 if(poff)
484 *poff = off + (uchar*)de - bp->data;
485 inum = de->inum;
486 brelse(bp);
487 return iget(dp->dev, inum);
490 brelse(bp);
492 return 0;
495 // Write a new directory entry (name, inum) into the directory dp.
497 dirlink(struct inode *dp, char *name, uint inum)
499 int off;
500 struct dirent de;
501 struct inode *ip;
503 // Check that name is not present.
504 if((ip = dirlookup(dp, name, 0)) != 0){
505 iput(ip);
506 return -1;
509 // Look for an empty dirent.
510 for(off = 0; off < dp->size; off += sizeof(de)){
511 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
512 panic("dirlink read");
513 if(de.inum == 0)
514 break;
517 strncpy(de.name, name, DIRSIZ);
518 de.inum = inum;
519 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
520 panic("dirlink");
522 return 0;
525 // Paths
527 // Copy the next path element from path into name.
528 // Return a pointer to the element following the copied one.
529 // The returned path has no leading slashes,
530 // so the caller can check *path=='\0' to see if the name is the last one.
531 // If no name to remove, return 0.
533 // Examples:
534 // skipelem("a/bb/c", name) = "bb/c", setting name = "a"
535 // skipelem("///a//bb", name) = "bb", setting name = "a"
536 // skipelem("a", name) = "", setting name = "a"
537 // skipelem("", name) = skipelem("////", name) = 0
539 static char*
540 skipelem(char *path, char *name)
542 char *s;
543 int len;
545 while(*path == '/')
546 path++;
547 if(*path == 0)
548 return 0;
549 s = path;
550 while(*path != '/' && *path != 0)
551 path++;
552 len = path - s;
553 if(len >= DIRSIZ)
554 memmove(name, s, DIRSIZ);
555 else {
556 memmove(name, s, len);
557 name[len] = 0;
559 while(*path == '/')
560 path++;
561 return path;
564 // Look up and return the inode for a path name.
565 // If parent != 0, return the inode for the parent and copy the final
566 // path element into name, which must have room for DIRSIZ bytes.
567 static struct inode*
568 namex(char *path, int nameiparent, char *name)
570 struct inode *ip, *next;
572 if(*path == '/')
573 ip = iget(ROOTDEV, ROOTINO);
574 else
575 ip = idup(proc->cwd);
577 while((path = skipelem(path, name)) != 0){
578 ilock(ip);
579 if(ip->type != T_DIR){
580 iunlockput(ip);
581 return 0;
583 if(nameiparent && *path == '\0'){
584 // Stop one level early.
585 iunlock(ip);
586 return ip;
588 if((next = dirlookup(ip, name, 0)) == 0){
589 iunlockput(ip);
590 return 0;
592 iunlockput(ip);
593 ip = next;
595 if(nameiparent){
596 iput(ip);
597 return 0;
599 return ip;
602 struct inode*
603 namei(char *path)
605 char name[DIRSIZ];
606 return namex(path, 0, name);
609 struct inode*
610 nameiparent(char *path, char *name)
612 return namex(path, 1, name);