revert between 56095 -> 55830 in arch
[AROS.git] / rom / filesys / cdfs / bcache.c
blob96979835fc40bef740edfc0dab86939cdab6913b
1 /*
2 * Copyright (C) 2013, The AROS Development Team
3 * All right reserved.
4 * Author: Jason S. McMullan <jason.mcmullan@gmail.com>
6 * Licensed under the AROS PUBLIC LICENSE (APL) Version 1.1
7 */
9 /* 'BCache' is a trivial write-through cache for block devices.
11 * Supports TD_*, CD_*, TD64_*, and NSCMD_* devices
14 #include <aros/debug.h>
16 #include <proto/exec.h>
18 #include <devices/trackdisk.h>
19 #include <devices/newstyle.h>
21 #include "bcache.h"
23 /* From Wikipedia - XORShift RNG, by George Marsaglia
25 struct XorshiftRNG {
26 ULONG w, x, y, z;
29 static VOID XorshiftRNG_Init(struct XorshiftRNG *r)
31 r->w = 88675123;
32 r->x = 123456789;
33 r->y = 362436069;
34 r->z = 521288629;
37 static ULONG XorshiftRNG(struct XorshiftRNG *r)
39 ULONG t;
40 t = r->x ^ (r->x << 11);
41 r->x = r->y; r->y = r->z; r->z = r->w;
42 return r->w = r->w ^ (r->w >> 19) ^ (t ^ (t >> 8));
46 struct BCacheEntry {
47 struct MinNode be_Node;
48 UBYTE *be_Buffer; /* Pointer into bp_CacheBlocks */
49 ULONG be_Block; /* Block address.
50 * Sufficient for up to:
51 * 2TB of 512 byte blocks
52 * 8TB of 2048 byte blocks
53 * 16TB of 4096 byte blocks
57 struct BCachePrivate {
58 struct BCache bp_Public;
59 struct ExecBase *bp_SysBase;
60 struct IOStdReq *bp_IOStdReq;
61 ULONG bp_ChangeIdx; /* Last invalidate changenum */
62 ULONG bp_ChangeNum; /* Last changenum */
63 ULONG bp_BlockBase;
64 ULONG bp_Blocks;
65 ULONG bp_ReadCMD;
66 ULONG bp_WriteCMD;
67 struct XorshiftRNG bp_RNG;
69 LONG bp_NumBuffers;
70 ULONG bp_BufMemType;
71 UBYTE *bp_CacheBlocks; /* Single contiguous buffer area */
72 struct BCacheEntry *bp_CacheEntries;
73 struct List bp_CacheValid; /* Active blocks */
74 struct List bp_CacheInvalid; /* Inactive entries */
77 /* Create a buffer cache, based off of a FileSysStartupMsg.
78 * Note that:
79 * bc_BlockSize will be fsm_Environ[DE_BLOCKSIZE]*4
80 * (default - 512)
81 * bc_Blocks is the number of blocks from DE_LOWCYL to DE_HIGHCYL
82 * (default - total # of blocks on the disk)
84 LONG BCache_Create(struct ExecBase *SysBase, struct FileSysStartupMsg *fssm, struct BCache **bcache)
86 struct BCachePrivate *bp;
87 struct DosEnvec *de = BADDR(fssm->fssm_Environ);
88 LONG err;
90 if ((bp = AllocVec(sizeof(*bp), MEMF_ANY | MEMF_PUBLIC))) {
91 struct MsgPort *mp;
92 if ((mp = CreateMsgPort())) {
93 struct IOStdReq *io;
94 if ((io = (struct IOStdReq*)CreateIORequest(mp, sizeof(*io)))) {
95 bp->bp_IOStdReq = io;
96 bp->bp_SysBase = SysBase;
97 XorshiftRNG_Init(&bp->bp_RNG);
99 D(bug("%s: Device %b.%d\n", __func__, fssm->fssm_Device, fssm->fssm_Unit));
101 if (0 == OpenDevice(AROS_BSTR_ADDR(fssm->fssm_Device), fssm->fssm_Unit, (struct IORequest *)io, 0)) {
102 struct DriveGeometry dg;
103 struct NSDeviceQueryResult nsq;
104 ULONG blocksize = 1;
105 ULONG blockspertrack = 0, lowcyl = 0, cylinders = ~0;
106 ULONG numbuffers = 0;
107 ULONG bufmemtype = MEMF_PUBLIC;
109 D(bug("%s: Opened.\n", __func__));
111 /* Get the device geometry */
112 io->io_Command = TD_GETGEOMETRY;
113 io->io_Data = &dg;
114 io->io_Offset = 0;
115 io->io_Length = sizeof(dg);
116 io->io_Actual = 0;
117 if (0 == DoIO((struct IORequest *)io)) {
118 blockspertrack = dg.dg_TrackSectors;
119 lowcyl = 0;
120 cylinders = dg.dg_Cylinders;
121 blocksize = dg.dg_SectorSize;
123 D(bug("%s: Geometry: bs=%d, blocks/track=%d, cyls=%d\n", __func__, blocksize, blockspertrack, cylinders));
126 if (de->de_TableSize >= DE_SIZEBLOCK)
127 blocksize = de->de_SizeBlock * 4;
129 /* Default to 32-bit read/write */
130 bp->bp_ReadCMD = CMD_READ;
131 bp->bp_WriteCMD = CMD_WRITE;
132 bp->bp_Blocks = (ULONG)(0x100000000ULL/blocksize);
134 /* Probe for TD_READ64 */
135 io->io_Command = TD_READ64;
136 io->io_Data = NULL;
137 io->io_Length = 0;
138 io->io_Offset = 0;
139 io->io_Actual = 0;
140 if (0 == DoIO((struct IORequest *)io)) {
141 bp->bp_ReadCMD = TD_READ64;
142 bp->bp_WriteCMD = TD_WRITE64;
143 bp->bp_Blocks = (ULONG)(~0ULL/blocksize);
144 D(bug("%s: TD64 works\n", __func__));
147 /* Determine if it's a NSD device */
148 io->io_Command = NSCMD_DEVICEQUERY;
149 io->io_Data = &nsq;
150 io->io_Length = sizeof(nsq);
151 io->io_Offset = 0;
152 io->io_Actual = 0;
153 if (0 == DoIO((struct IORequest *)io)) {
154 int i;
155 D(bug("%s: NSCMD_DEVICEQUERY works\n", __func__));
156 for (i = 0; nsq.SupportedCommands[i] != 0; i++) {
157 if (nsq.SupportedCommands[i] == NSCMD_TD_READ64) {
158 bp->bp_ReadCMD = NSCMD_TD_READ64;
159 bp->bp_WriteCMD = NSCMD_TD_WRITE64;
160 bp->bp_Blocks = (ULONG)(~0ULL/blocksize);
161 D(bug("%s: NSCMD_TD_READ64 works\n", __func__));
162 break;
167 if (de->de_TableSize >= DE_BLKSPERTRACK && de->de_BlocksPerTrack)
168 blockspertrack = de->de_BlocksPerTrack;
170 if (de->de_TableSize >= DE_LOWCYL && de->de_LowCyl)
171 lowcyl = de->de_LowCyl;
173 if (de->de_TableSize >= DE_HIGHCYL && de->de_HighCyl) {
174 if ((de->de_HighCyl+1-lowcyl) < cylinders)
175 cylinders = (de->de_HighCyl+1-lowcyl);
178 if (de->de_TableSize >= DE_NUMBUFFERS) {
179 numbuffers = de->de_NumBuffers;
181 if (numbuffers == 0) {
182 if (blockspertrack > 32)
183 numbuffers = 32;
184 else
185 numbuffers = blockspertrack;
188 if (de->de_TableSize >= DE_BUFMEMTYPE) {
189 bufmemtype = de->de_BufMemType;
192 if (((lowcyl + cylinders) * blockspertrack) < bp->bp_Blocks) {
193 bp->bp_Public.bc_BlockSize = blocksize;
194 bp->bp_Blocks = cylinders * blockspertrack;
195 bp->bp_BlockBase = lowcyl * blockspertrack;
196 io->io_Command = TD_CHANGENUM;
197 io->io_Data = NULL;
198 io->io_Offset = 0;
199 io->io_Length = 0;
200 io->io_Actual = 0;
201 if (0 == DoIO((struct IORequest *)io)) {
202 D(bug("%s: TD_CHANGENUM works\n", __func__));
203 bp->bp_ChangeNum = io->io_Actual;
204 } else {
205 bp->bp_ChangeNum = ~0;
207 bp->bp_ChangeIdx = bp->bp_ChangeNum;
209 bp->bp_NumBuffers = 0;
210 bp->bp_BufMemType = bufmemtype;
211 err = BCache_Extend(&bp->bp_Public, numbuffers);
212 if (err == RETURN_OK) {
213 BCache_Invalidate(&bp->bp_Public);
214 *bcache = &bp->bp_Public;
215 D(bug("%s: BCache of %dx%d blocks created for blocks %d-%d\n", __func__, bp->bp_Public.bc_BlockSize, bp->bp_NumBuffers, bp->bp_BlockBase, bp->bp_BlockBase + bp->bp_Blocks-1));
216 return RETURN_OK;
217 } else {
218 err = ERROR_NO_FREE_STORE;
220 } else {
221 err = ERROR_SEEK_ERROR;
223 CloseDevice((struct IORequest *)io);
224 } else {
225 err = ERROR_OBJECT_NOT_FOUND;
227 DeleteIORequest((struct IORequest *)io);
228 } else {
229 err = ERROR_NO_FREE_STORE;
231 DeleteMsgPort(mp);
232 } else {
233 err = ERROR_NO_FREE_STORE;
235 FreeVec(bp);
236 } else {
237 err = ERROR_NO_FREE_STORE;
239 *bcache = NULL;
240 return err;
243 #undef SysBase
244 #define SysBase bp->bp_SysBase
246 /* Dispose of the cache, and close the underlying device
248 * NOTE: This does *not* call Remove() on bc_Node - do that
249 * before calling this routine!
251 VOID BCache_Delete(struct BCache *bcache)
253 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
254 ULONG size = bp->bp_Public.bc_BlockSize * bp->bp_NumBuffers +
255 sizeof(bp->bp_CacheEntries[0]) * bp->bp_NumBuffers;
257 CloseDevice((struct IORequest *)&bp->bp_IOStdReq);
258 FreeMem(bp->bp_CacheBlocks, size);
259 FreeVec(bp);
262 /* Extend/Reduce the cache by 'numbuffers' blocks
264 LONG BCache_Extend(struct BCache *bcache, LONG numbuffers)
266 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
267 UBYTE *newcache;
268 ULONG newsize;
269 ULONG oldsize = bp->bp_Public.bc_BlockSize * bp->bp_NumBuffers +
270 sizeof(bp->bp_CacheEntries[0]) * bp->bp_NumBuffers;
272 /* Minimum size is 1 buffer */
273 if (bp->bp_NumBuffers + numbuffers <= 0)
274 numbuffers = 1 - bp->bp_NumBuffers;
276 if (numbuffers == 0) {
277 D(bug("%s: No change to buffer size\n", __func__));
278 return RETURN_OK;
281 numbuffers += bp->bp_NumBuffers;
282 newsize = bp->bp_Public.bc_BlockSize * numbuffers +
283 sizeof(bp->bp_CacheEntries[0]) * numbuffers;
285 newcache = AllocMem(newsize, bp->bp_BufMemType);
286 if (newcache) {
287 struct BCacheEntry *newentry = (struct BCacheEntry *)&newcache[bp->bp_Public.bc_BlockSize * numbuffers];
288 LONG i, index;
290 NEWLIST(&bp->bp_CacheValid);
291 NEWLIST(&bp->bp_CacheInvalid);
292 for (i = index = 0; i < numbuffers; i++, index += bp->bp_Public.bc_BlockSize) {
293 newentry[i].be_Buffer = &newcache[index];
294 AddTail(&bp->bp_CacheInvalid, (struct Node *)&newentry[i].be_Node);
297 if (bp->bp_NumBuffers)
298 FreeMem(bp->bp_CacheBlocks, oldsize);
300 bp->bp_NumBuffers = numbuffers;
301 bp->bp_CacheBlocks = newcache;
302 bp->bp_CacheEntries = newentry;
303 D(bug("%s: Cache size now %d entries\n", __func__, numbuffers));
304 return RETURN_OK;
307 return ERROR_NO_FREE_STORE;
310 /* returns:
311 * RETURN_OK: No change since last invalidate
312 * RETURN_WARN: Disk has changes, but a disk is present
313 * ERROR_NO_DISK: No disk present
315 LONG BCache_Present(struct BCache *bcache)
317 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
318 struct IOStdReq *io = bp->bp_IOStdReq;
320 /* Determine if medium has changed */
321 io->io_Command = TD_CHANGENUM;
322 io->io_Data = NULL;
323 io->io_Offset = 0;
324 io->io_Length = 0;
325 io->io_Actual = 0;
326 if (0 == DoIO((struct IORequest *)io))
327 bp->bp_ChangeNum = io->io_Actual;
329 if (bp->bp_ChangeNum != bp->bp_ChangeIdx) {
330 /* Determine if a new disk is present */
331 io->io_Command = TD_CHANGESTATE;
332 io->io_Data = NULL;
333 io->io_Offset = 0;
334 io->io_Length = 0;
335 io->io_Actual = 0;
336 /* Even if this fails, io_Actual should be good for us */
337 DoIO((struct IORequest *)io);
338 return (io->io_Actual ? ERROR_NO_DISK : RETURN_WARN);
341 return RETURN_OK;
345 /* Drop all cache
346 * returns:
347 * RETURN_OK - Cache flushed, disk present
348 * ERROR_NO_DISK - Cache flushed, no disk present
350 LONG BCache_Invalidate(struct BCache *bcache)
352 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
353 struct BCacheEntry *be;
354 int i;
356 while ((be = (struct BCacheEntry *)REMHEAD(&bp->bp_CacheValid)))
357 ADDHEAD(&bp->bp_CacheInvalid, be);
359 if (BCache_Present(bcache) == ERROR_NO_DISK)
360 return ERROR_NO_DISK;
362 /* We have a disk - mark it as 'present' for this cache */
363 bp->bp_ChangeNum = bp->bp_ChangeIdx;
365 /* Stir the RNG */
366 for (i = 0; i < (bp->bp_ChangeNum & 0xf); i++)
367 XorshiftRNG_Init(&bp->bp_RNG);
369 return RETURN_OK;
372 static LONG BCache_IO(struct BCache *bcache, ULONG cmd, ULONG block, ULONG blocks, APTR buffer)
374 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
375 struct IOStdReq *io = bp->bp_IOStdReq;
376 LONG err;
377 UQUAD offset, length;
379 D(bug("%s: cmd=%d, blocks %d (%d)\n", __func__, cmd, block, blocks));
381 err = BCache_Present(bcache);
382 if (err != RETURN_OK) {
383 D(bug("%s: Disk changed or not present\n", __func__));
384 return err;
387 /* Validate the offset + length */
388 if ((block + blocks) > bp->bp_Blocks) {
389 D(bug("%s: Block 0x%08x exceeds max 0x%08x\n", __func__, (ULONG)(block + blocks), (ULONG)bp->bp_Blocks));
390 return ERROR_SEEK_ERROR;
393 /* Read the data into the buffer */
394 offset = (UQUAD)block * bp->bp_Public.bc_BlockSize;
395 length = (UQUAD)blocks * bp->bp_Public.bc_BlockSize;
397 if (length >= (1ULL<<32)) {
398 D(bug("%s: Block range exceeds ULONG size\n", __func__));
399 return ERROR_OBJECT_TOO_LARGE;
402 io->io_Command = cmd;
403 io->io_Data = buffer;
404 io->io_Length = (ULONG)length;
405 io->io_Offset = (ULONG)offset;
406 io->io_Actual = (ULONG)(offset >> 32);
407 if (0 == DoIO((struct IORequest *)io)) {
408 /* Stir the RNG */
409 XorshiftRNG(&bp->bp_RNG);
410 D(bug("%s: io_Error %d\n", __func__, io->io_Error));
411 return io->io_Error ? RETURN_ERROR : RETURN_OK;
414 D(bug("%s: Invalid command?\n", __func__));
415 return RETURN_FAIL;
418 /* Read block from disk to the buffer
419 * returns:
420 * RETURN_OK - Disk present
421 * Data read from disk
422 * RETURN_WARN - Disk has changed since last BCache_Invalidate();
423 * No data read from disk
424 * RETURN_ERROR - Disk read error
425 * Partial data read from disk
426 * ERROR_NO_DISK - No disk present
427 * No data read from disk
429 LONG BCache_Read(struct BCache *bcache, ULONG block, UBYTE **bufferp)
431 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
432 struct BCacheEntry *be;
433 LONG err;
434 ULONG depth = 0;
435 ULONG index, run;
436 int i;
438 D(bug("%s: Block %d\n", __func__, block));
439 if (block > bp->bp_Blocks)
440 return ERROR_SEEK_ERROR;
442 /* Scan to see if this block was cached */
443 ForeachNode(&bp->bp_CacheValid, be) {
444 if (be->be_Block == block) {
445 /* Move to the front of the list, if it is
446 * more than 1/8th of the way into the
447 * cache.
449 D(bug("%s: Cached block found at depth %d\n", __func__, depth));
450 if (depth > (bp->bp_NumBuffers / 8)) {
451 D(bug("%s: Moving to head of Valid list..\n", __func__));
452 REMOVE(be);
453 ADDHEAD(&bp->bp_CacheValid, be);
455 *bufferp = be->be_Buffer;
456 return RETURN_OK;
458 depth++;
461 /* Ok, no cached block. Let's pick a random spot
462 * and random length out of the buffer cache.
464 if (bp->bp_NumBuffers == 1) {
465 index = 0;
466 run = 1;
467 } else {
468 if (bp->bp_NumBuffers < 16)
469 run = 1 + (XorshiftRNG(&bp->bp_RNG) % (bp->bp_NumBuffers - 1));
470 else
471 run = 8 + (XorshiftRNG(&bp->bp_RNG) & 7);
472 index = XorshiftRNG(&bp->bp_RNG) % (bp->bp_NumBuffers - run);
475 ASSERT(run > 0);
476 ASSERT(index + run <= bp->bp_NumBuffers);
477 ASSERT(block + run <= bp->bp_Blocks);
479 /* First, shorten the run to prevent duplications in the cache */
480 D(bug("%s: Caching blocks %d (%d)\n", __func__, block, run));
481 ForeachNode(&bp->bp_CacheValid, be) {
482 if (be->be_Block > block && be->be_Block < (block + run)) {
483 /* Shorten the run */
484 D(bug("%s: Shorten run: cached block %d in (%d - %d)\n",
485 __func__, be->be_Block, block, block + run - 1));
486 run = be->be_Block - block;
490 ASSERT(run > 0);
492 /* Steal the entries for these buffers */
493 D(bug("%s: Steal indexes %d (%d)\n", __func__, index, run));
494 for (i = 0, be = &bp->bp_CacheEntries[index]; i < run; i++, be++) {
495 REMOVE(&be->be_Node);
498 be = &bp->bp_CacheEntries[index];
500 /* Read the run */
501 err = BCache_IO(bcache, bp->bp_ReadCMD, block, run, be->be_Buffer);
502 if (err == RETURN_OK) {
503 D(bug("%s: Adding to Valid cache...\n", __func__));
504 for (i = 0; i < run; i++, be++) {
505 be->be_Block = block + i;
506 ADDHEAD(&bp->bp_CacheValid, &be->be_Node);
508 *bufferp = bp->bp_CacheEntries[index].be_Buffer;
509 } else {
510 D(bug("%s: IO failed, returning %d (%d) to Invalid list\n", __func__, index, run));
511 for (i = 0; i < run; i++, be++) {
512 ADDHEAD(&bp->bp_CacheInvalid, &be->be_Node);
516 D(bug("Valid:"));
517 ForeachNode(&bp->bp_CacheValid, be) {
518 D(bug(" %d (%d)", be - bp->bp_CacheEntries, be->be_Block));
520 D(bug("\nInval"));
521 ForeachNode(&bp->bp_CacheInvalid, be) {
522 D(bug(" %d", be - bp->bp_CacheEntries));
524 D(bug("\n"));
526 return err;
529 /* Write buffer to blocks on the disk.
530 * returns:
531 * RETURN_OK - Disk present
532 * Data written to disk
533 * RETURN_WARN - Disk has changed since last BCache_Invalidate();
534 * No data written to disk
535 * RETURN_ERROR - Disk write error
536 * Partial data written to disk
537 * ERROR_NO_DISK - No disk present
538 * No data written to disk
540 LONG BCache_Write(struct BCache *bcache, ULONG block, CONST UBYTE *buffer)
542 struct BCachePrivate *bp = (struct BCachePrivate *)bcache;
543 struct BCacheEntry *be;
544 ULONG depth = 0;
546 if (block > bp->bp_Blocks)
547 return ERROR_SEEK_ERROR;
549 /* Scan to see if this block was cached */
550 ForeachNode(&bp->bp_CacheValid, be) {
551 if (be->be_Block == block) {
552 /* Move to the front of the list, if it is
553 * more than 1/8th of the way into the
554 * cache.
556 if (depth > (bp->bp_NumBuffers / 8)) {
557 REMOVE(be);
558 ADDHEAD(&bp->bp_CacheValid, be);
560 /* Synchronize the cache with the data to write */
561 CopyMem(buffer, be->be_Buffer, bp->bp_Public.bc_BlockSize);
562 break;
564 depth++;
567 return BCache_IO(bcache, bp->bp_WriteCMD, block, 1, (APTR)buffer);