1 /* vi: set sw=4 ts=4: */
3 * unix_io.c --- This is the Unix (well, really POSIX) implementation
6 * Implements a one-block write-through cache.
8 * Includes support for Windows NT support under Cygwin.
10 * Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
11 * 2002 by Theodore Ts'o.
14 * This file may be redistributed under the terms of the GNU Public
30 #include <sys/utsname.h>
36 #include <sys/types.h>
38 #include <sys/resource.h>
44 * For checking structure magic numbers...
47 #define EXT2_CHECK_MAGIC(struct, code) \
48 if ((struct)->magic != (code)) return (code)
59 #define WRITE_DIRECT_SIZE 4 /* Must be smaller than CACHE_SIZE */
60 #define READ_DIRECT_SIZE 4 /* Should be smaller than CACHE_SIZE */
62 struct unix_private_data
{
68 struct unix_cache cache
[CACHE_SIZE
];
71 static errcode_t
unix_open(const char *name
, int flags
, io_channel
*channel
);
72 static errcode_t
unix_close(io_channel channel
);
73 static errcode_t
unix_set_blksize(io_channel channel
, int blksize
);
74 static errcode_t
unix_read_blk(io_channel channel
, unsigned long block
,
75 int count
, void *data
);
76 static errcode_t
unix_write_blk(io_channel channel
, unsigned long block
,
77 int count
, const void *data
);
78 static errcode_t
unix_flush(io_channel channel
);
79 static errcode_t
unix_write_byte(io_channel channel
, unsigned long offset
,
80 int size
, const void *data
);
81 static errcode_t
unix_set_option(io_channel channel
, const char *option
,
84 static void reuse_cache(io_channel channel
, struct unix_private_data
*data
,
85 struct unix_cache
*cache
, unsigned long block
);
87 /* __FreeBSD_kernel__ is defined by GNU/kFreeBSD - the FreeBSD kernel
88 * does not know buffered block devices - everything is raw. */
89 #if defined(__CYGWIN__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
90 #define NEED_BOUNCE_BUFFER
92 #undef NEED_BOUNCE_BUFFER
95 static struct struct_io_manager struct_unix_manager
= {
96 EXT2_ET_MAGIC_IO_MANAGER
,
104 #ifdef NEED_BOUNCE_BUFFER
112 io_manager unix_io_manager
= &struct_unix_manager
;
115 * Here are the raw I/O functions
117 #ifndef NEED_BOUNCE_BUFFER
118 static errcode_t
raw_read_blk(io_channel channel
,
119 struct unix_private_data
*data
,
121 int count
, void *buf
)
125 ext2_loff_t location
;
128 size
= (count
< 0) ? -count
: count
* channel
->block_size
;
129 location
= ((ext2_loff_t
) block
* channel
->block_size
) + data
->offset
;
130 if (ext2fs_llseek(data
->dev
, location
, SEEK_SET
) != location
) {
131 retval
= errno
? errno
: EXT2_ET_LLSEEK_FAILED
;
134 actual
= read(data
->dev
, buf
, size
);
135 if (actual
!= size
) {
138 retval
= EXT2_ET_SHORT_READ
;
144 memset((char *) buf
+actual
, 0, size
-actual
);
145 if (channel
->read_error
)
146 retval
= (channel
->read_error
)(channel
, block
, count
, buf
,
147 size
, actual
, retval
);
150 #else /* NEED_BOUNCE_BUFFER */
152 * Windows and FreeBSD block devices only allow sector alignment IO in offset and size
154 static errcode_t
raw_read_blk(io_channel channel
,
155 struct unix_private_data
*data
,
157 int count
, void *buf
)
160 size_t size
, alignsize
, fragment
;
161 ext2_loff_t location
;
162 int total
= 0, actual
;
163 #define BLOCKALIGN 512
164 char sector
[BLOCKALIGN
];
166 size
= (count
< 0) ? -count
: count
* channel
->block_size
;
167 location
= ((ext2_loff_t
) block
* channel
->block_size
) + data
->offset
;
169 printf("count=%d, size=%d, block=%d, blk_size=%d, location=%lx\n",
170 count
, size
, block
, channel
->block_size
, location
);
172 if (ext2fs_llseek(data
->dev
, location
, SEEK_SET
) != location
) {
173 retval
= errno
? errno
: EXT2_ET_LLSEEK_FAILED
;
176 fragment
= size
% BLOCKALIGN
;
177 alignsize
= size
- fragment
;
179 actual
= read(data
->dev
, buf
, alignsize
);
180 if (actual
!= alignsize
)
184 actual
= read(data
->dev
, sector
, BLOCKALIGN
);
185 if (actual
!= BLOCKALIGN
)
187 memcpy(buf
+alignsize
, sector
, fragment
);
194 retval
= EXT2_ET_SHORT_READ
;
197 memset((char *) buf
+total
, 0, size
-actual
);
198 if (channel
->read_error
)
199 retval
= (channel
->read_error
)(channel
, block
, count
, buf
,
200 size
, actual
, retval
);
205 static errcode_t
raw_write_blk(io_channel channel
,
206 struct unix_private_data
*data
,
208 int count
, const void *buf
)
211 ext2_loff_t location
;
216 size
= channel
->block_size
;
221 size
= count
* channel
->block_size
;
224 location
= ((ext2_loff_t
) block
* channel
->block_size
) + data
->offset
;
225 if (ext2fs_llseek(data
->dev
, location
, SEEK_SET
) != location
) {
226 retval
= errno
? errno
: EXT2_ET_LLSEEK_FAILED
;
230 actual
= write(data
->dev
, buf
, size
);
231 if (actual
!= size
) {
232 retval
= EXT2_ET_SHORT_WRITE
;
238 if (channel
->write_error
)
239 retval
= (channel
->write_error
)(channel
, block
, count
, buf
,
240 size
, actual
, retval
);
246 * Here we implement the cache functions
249 /* Allocate the cache buffers */
250 static errcode_t
alloc_cache(io_channel channel
,
251 struct unix_private_data
*data
)
254 struct unix_cache
*cache
;
257 data
->access_time
= 0;
258 for (i
=0, cache
= data
->cache
; i
< CACHE_SIZE
; i
++, cache
++) {
260 cache
->access_time
= 0;
263 if ((retval
= ext2fs_get_mem(channel
->block_size
,
270 /* Free the cache buffers */
271 static void free_cache(struct unix_private_data
*data
)
273 struct unix_cache
*cache
;
276 data
->access_time
= 0;
277 for (i
=0, cache
= data
->cache
; i
< CACHE_SIZE
; i
++, cache
++) {
279 cache
->access_time
= 0;
282 ext2fs_free_mem(&cache
->buf
);
289 * Try to find a block in the cache. If the block is not found, and
290 * eldest is a non-zero pointer, then fill in eldest with the cache
291 * entry to that should be reused.
293 static struct unix_cache
*find_cached_block(struct unix_private_data
*data
,
295 struct unix_cache
**eldest
)
297 struct unix_cache
*cache
, *unused_cache
, *oldest_cache
;
300 unused_cache
= oldest_cache
= 0;
301 for (i
=0, cache
= data
->cache
; i
< CACHE_SIZE
; i
++, cache
++) {
302 if (!cache
->in_use
) {
304 unused_cache
= cache
;
307 if (cache
->block
== block
) {
308 cache
->access_time
= ++data
->access_time
;
312 (cache
->access_time
< oldest_cache
->access_time
))
313 oldest_cache
= cache
;
316 *eldest
= (unused_cache
) ? unused_cache
: oldest_cache
;
321 * Reuse a particular cache entry for another block.
323 static void reuse_cache(io_channel channel
, struct unix_private_data
*data
,
324 struct unix_cache
*cache
, unsigned long block
)
326 if (cache
->dirty
&& cache
->in_use
)
327 raw_write_blk(channel
, data
, cache
->block
, 1, cache
->buf
);
331 cache
->block
= block
;
332 cache
->access_time
= ++data
->access_time
;
336 * Flush all of the blocks in the cache
338 static errcode_t
flush_cached_blocks(io_channel channel
,
339 struct unix_private_data
*data
,
343 struct unix_cache
*cache
;
344 errcode_t retval
, retval2
;
348 for (i
=0, cache
= data
->cache
; i
< CACHE_SIZE
; i
++, cache
++) {
358 retval
= raw_write_blk(channel
, data
,
359 cache
->block
, 1, cache
->buf
);
367 #endif /* NO_IO_CACHE */
369 static errcode_t
unix_open(const char *name
, int flags
, io_channel
*channel
)
371 io_channel io
= NULL
;
372 struct unix_private_data
*data
= NULL
;
381 return EXT2_ET_BAD_DEVICE_NAME
;
382 retval
= ext2fs_get_mem(sizeof(struct struct_io_channel
), &io
);
385 memset(io
, 0, sizeof(struct struct_io_channel
));
386 io
->magic
= EXT2_ET_MAGIC_IO_CHANNEL
;
387 retval
= ext2fs_get_mem(sizeof(struct unix_private_data
), &data
);
391 io
->manager
= unix_io_manager
;
392 retval
= ext2fs_get_mem(strlen(name
)+1, &io
->name
);
396 strcpy(io
->name
, name
);
397 io
->private_data
= data
;
398 io
->block_size
= 1024;
403 memset(data
, 0, sizeof(struct unix_private_data
));
404 data
->magic
= EXT2_ET_MAGIC_UNIX_IO_CHANNEL
;
406 if ((retval
= alloc_cache(io
, data
)))
409 open_flags
= (flags
& IO_FLAG_RW
) ? O_RDWR
: O_RDONLY
;
411 data
->dev
= open64(io
->name
, open_flags
);
413 data
->dev
= open(io
->name
, open_flags
);
422 #if (defined(__alpha__) || (defined(__sparc__) && (__WORDSIZE == 32)) || (defined(__mips__) && (_MIPS_SZLONG == 32)))
423 #define RLIM_INFINITY ((unsigned long)(~0UL>>1))
425 #define RLIM_INFINITY (~0UL)
428 * Work around a bug in 2.4.10-2.4.18 kernels where writes to
429 * block devices are wrongly getting hit by the filesize
430 * limit. This workaround isn't perfect, since it won't work
431 * if glibc wasn't built against 2.2 header files. (Sigh.)
434 if ((flags
& IO_FLAG_RW
) &&
436 ((ut
.release
[0] == '2') && (ut
.release
[1] == '.') &&
437 (ut
.release
[2] == '4') && (ut
.release
[3] == '.') &&
438 (ut
.release
[4] == '1') && (ut
.release
[5] >= '0') &&
439 (ut
.release
[5] < '8')) &&
440 (fstat(data
->dev
, &st
) == 0) &&
441 (S_ISBLK(st
.st_mode
))) {
444 rlim
.rlim_cur
= rlim
.rlim_max
= (unsigned long) RLIM_INFINITY
;
445 setrlimit(RLIMIT_FSIZE
, &rlim
);
446 getrlimit(RLIMIT_FSIZE
, &rlim
);
447 if (((unsigned long) rlim
.rlim_cur
) <
448 ((unsigned long) rlim
.rlim_max
)) {
449 rlim
.rlim_cur
= rlim
.rlim_max
;
450 setrlimit(RLIMIT_FSIZE
, &rlim
);
460 ext2fs_free_mem(&data
);
462 ext2fs_free_mem(&io
);
466 static errcode_t
unix_close(io_channel channel
)
468 struct unix_private_data
*data
;
469 errcode_t retval
= 0;
471 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
472 data
= (struct unix_private_data
*) channel
->private_data
;
473 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
475 if (--channel
->refcount
> 0)
479 retval
= flush_cached_blocks(channel
, data
, 0);
482 if (close(data
->dev
) < 0)
486 ext2fs_free_mem(&channel
->private_data
);
487 ext2fs_free_mem(&channel
->name
);
488 ext2fs_free_mem(&channel
);
492 static errcode_t
unix_set_blksize(io_channel channel
, int blksize
)
494 struct unix_private_data
*data
;
497 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
498 data
= (struct unix_private_data
*) channel
->private_data
;
499 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
501 if (channel
->block_size
!= blksize
) {
503 if ((retval
= flush_cached_blocks(channel
, data
, 0)))
507 channel
->block_size
= blksize
;
509 if ((retval
= alloc_cache(channel
, data
)))
516 static errcode_t
unix_read_blk(io_channel channel
, unsigned long block
,
517 int count
, void *buf
)
519 struct unix_private_data
*data
;
520 struct unix_cache
*cache
, *reuse
[READ_DIRECT_SIZE
];
525 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
526 data
= (struct unix_private_data
*) channel
->private_data
;
527 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
530 return raw_read_blk(channel
, data
, block
, count
, buf
);
533 * If we're doing an odd-sized read or a very large read,
534 * flush out the cache and then do a direct read.
536 if (count
< 0 || count
> WRITE_DIRECT_SIZE
) {
537 if ((retval
= flush_cached_blocks(channel
, data
, 0)))
539 return raw_read_blk(channel
, data
, block
, count
, buf
);
544 /* If it's in the cache, use it! */
545 if ((cache
= find_cached_block(data
, block
, &reuse
[0]))) {
547 printf("Using cached block %d\n", block
);
549 memcpy(cp
, cache
->buf
, channel
->block_size
);
552 cp
+= channel
->block_size
;
556 * Find the number of uncached blocks so we can do a
557 * single read request
559 for (i
=1; i
< count
; i
++)
560 if (find_cached_block(data
, block
+i
, &reuse
[i
]))
563 printf("Reading %d blocks starting at %d\n", i
, block
);
565 if ((retval
= raw_read_blk(channel
, data
, block
, i
, cp
)))
568 /* Save the results in the cache */
569 for (j
=0; j
< i
; j
++) {
572 reuse_cache(channel
, data
, cache
, block
++);
573 memcpy(cache
->buf
, cp
, channel
->block_size
);
574 cp
+= channel
->block_size
;
578 #endif /* NO_IO_CACHE */
581 static errcode_t
unix_write_blk(io_channel channel
, unsigned long block
,
582 int count
, const void *buf
)
584 struct unix_private_data
*data
;
585 struct unix_cache
*cache
, *reuse
;
586 errcode_t retval
= 0;
590 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
591 data
= (struct unix_private_data
*) channel
->private_data
;
592 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
595 return raw_write_blk(channel
, data
, block
, count
, buf
);
598 * If we're doing an odd-sized write or a very large write,
599 * flush out the cache completely and then do a direct write.
601 if (count
< 0 || count
> WRITE_DIRECT_SIZE
) {
602 if ((retval
= flush_cached_blocks(channel
, data
, 1)))
604 return raw_write_blk(channel
, data
, block
, count
, buf
);
608 * For a moderate-sized multi-block write, first force a write
609 * if we're in write-through cache mode, and then fill the
610 * cache with the blocks.
612 writethrough
= channel
->flags
& CHANNEL_FLAGS_WRITETHROUGH
;
614 retval
= raw_write_blk(channel
, data
, block
, count
, buf
);
618 cache
= find_cached_block(data
, block
, &reuse
);
621 reuse_cache(channel
, data
, cache
, block
);
623 memcpy(cache
->buf
, cp
, channel
->block_size
);
624 cache
->dirty
= !writethrough
;
627 cp
+= channel
->block_size
;
630 #endif /* NO_IO_CACHE */
633 static errcode_t
unix_write_byte(io_channel channel
, unsigned long offset
,
634 int size
, const void *buf
)
636 struct unix_private_data
*data
;
637 errcode_t retval
= 0;
640 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
641 data
= (struct unix_private_data
*) channel
->private_data
;
642 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
646 * Flush out the cache completely
648 if ((retval
= flush_cached_blocks(channel
, data
, 1)))
652 if (lseek(data
->dev
, offset
+ data
->offset
, SEEK_SET
) < 0)
655 actual
= write(data
->dev
, buf
, size
);
657 return EXT2_ET_SHORT_WRITE
;
663 * Flush data buffers to disk.
665 static errcode_t
unix_flush(io_channel channel
)
667 struct unix_private_data
*data
;
668 errcode_t retval
= 0;
670 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
671 data
= (struct unix_private_data
*) channel
->private_data
;
672 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
675 retval
= flush_cached_blocks(channel
, data
, 0);
681 static errcode_t
unix_set_option(io_channel channel
, const char *option
,
684 struct unix_private_data
*data
;
688 EXT2_CHECK_MAGIC(channel
, EXT2_ET_MAGIC_IO_CHANNEL
);
689 data
= (struct unix_private_data
*) channel
->private_data
;
690 EXT2_CHECK_MAGIC(data
, EXT2_ET_MAGIC_UNIX_IO_CHANNEL
);
692 if (!strcmp(option
, "offset")) {
694 return EXT2_ET_INVALID_ARGUMENT
;
696 tmp
= strtoul(arg
, &end
, 0);
698 return EXT2_ET_INVALID_ARGUMENT
;
702 return EXT2_ET_INVALID_ARGUMENT
;