1 /******************************************************************************
3 VERSION $Id: buf.c,v 1.26 92/01/09 09:15:26 margo Exp $
4 PACKAGE: User Level Shared Memory Manager
7 This package provides a buffer pool interface implemented as
8 a collection of file pages mapped into shared memory.
10 Based on Mark's buffer manager
30 ******************************************************************************/
31 #include <sys/types.h>
45 we need to translate between some type of file id that the user
46 process passes and a file descriptor. For now, it's a nop.
48 #define GET_MASTER get_sem ( buf_spinlock )
49 #define RELEASE_MASTER release_sem ( buf_spinlock )
51 #define LRUID *buf_lru
52 #define LRUP (bufhdr_table+*buf_lru)
53 #define MRU bufhdr_table[*buf_lru].lru.prev
55 /* Global indicator that you have started reusing buffers */
56 int do_statistics = 0;
58 Process Statics (pointers into shared memory)
60 static BUF_T *buf_table = 0;
61 static BUFHDR_T *bufhdr_table;
62 static int *buf_hash_table;
63 static int *buf_lru; /* LRU is the free list */
64 static int buf_spinlock;
65 static FINFO_T *buf_fids;
66 static int *buf_sp; /* Pointer to string free space */
67 static char *buf_strings;
69 /* Process Local FID->FD table */
70 static int fds[NUM_FILE_ENTRIES];
73 static BUFHDR_T *bf_assign_buf();
74 static int bf_fid_to_fd();
75 static BUFHDR_T *bf_newbuf();
76 static int bf_put_page();
92 Initialize Process local structures
94 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
98 buf_region = attach_region ( BUF_REGION_NAME, BUF_REGION_NUM,
99 BUF_REGION_SIZE, &ref_count );
103 error_log3 ( "Buf Region: ADDR: %d ID: %d SIZE: %d\n", buf_region,
104 BUF_REGION_NUM, BUF_REGION_SIZE );
106 buf_table = (BUF_T *)buf_region;
107 bufhdr_table = (BUFHDR_T *)(buf_table + NUM_BUFS);
108 buf_hash_table = (int *)(bufhdr_table + NUM_BUFS);
109 buf_lru = buf_hash_table + NUMTABLE_ENTRIES;
110 spinlockp = buf_lru + 1;
111 buf_fids = (FINFO_T *)(spinlockp+1);
112 buf_sp = (int *)(buf_fids + NUM_FILE_ENTRIES);
113 buf_strings = (char *)(buf_sp + 1);
115 /* Create locking spinlock (gets creating holding the lock) */
116 buf_spinlock = create_sem ( BUF_SPIN_NAME, BUF_SPIN_NUM, ref_count <= 1 );
117 if ( buf_spinlock < 0 ) {
120 if ( ref_count <= 1 ) {
121 *spinlockp = buf_spinlock;
123 /* Now initialize the buffer manager */
128 /* 2. Buffer headers */
129 for ( i = 0, bhp = bufhdr_table; i < NUM_BUFS; bhp++, i++ ) {
132 bhp->flags = 0; /* All Flags off */
134 bhp->wait_proc = -1; /* No sleepers */
135 LISTPE_INIT ( hash, bhp, i ); /* Hash chains */
137 bufhdr_table[0].lru.prev = NUM_BUFS-1;
138 bufhdr_table[NUM_BUFS-1].lru.next = 0;
141 for ( i = 0; i < NUMTABLE_ENTRIES; i++ ) {
142 buf_hash_table[i] = NUM_BUFS;
145 /* 4. File ID Table */
146 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
147 buf_fids[i].offset = -1;
148 buf_fids[i].npages = -1;
149 buf_fids[i].refcount = 0;
152 /* 5. Free String Pointer */
153 *buf_sp = (FILE_NAME_LEN*NUM_FILE_ENTRIES);
154 if (RELEASE_MASTER) {
157 error_log0 ( "Initialized buffer region\n" );
168 /* Flush Buffer Pool on Exit */
169 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
170 if ( fds[i] != -1 ) {
175 detach_region ( buf_table, BUF_REGION_NUM, BUF_REGION_SIZE, &ref );
181 We need an empty buffer. Find the LRU unpinned NON-Dirty page.
194 bhp->flags & (BUF_PINNED|BUF_IO_IN_PROGRESS);
195 bhp = LISTP_NEXTP (bufhdr_table, lru, bhp ) ) {
197 if ( bhp->lru.next == lruid ) {
199 error_log1 ( "All buffers are pinned. %s\n",
200 "Unable to grant buffer request" );
204 /* BHP can be used */
205 if ( bhp->flags & BUF_DIRTY ) {
208 MIS Check for log flushed appropriately
210 fd = bf_fid_to_fd(bhp->id.file_id);
212 error_log1 ("Invalid fid %d\n", bhp->id.file_id);
215 if ( bf_put_page(fd, bhp) < 0 ) {
219 /* Update Hash Pointers */
220 ndx = BUF_HASH ( bhp->id.file_id, bhp->id.obj_id );
221 LISTP_REMOVE(bufhdr_table, hash, bhp);
222 if ( buf_hash_table[ndx] == (bhp-bufhdr_table) ) {
223 if ( bhp->hash.next != (bhp-bufhdr_table) ) {
224 buf_hash_table[ndx] = bhp->hash.next;
226 buf_hash_table[ndx] = NUM_BUFS;
236 Add a page to a file and return a buffer for it.
240 buf_alloc ( fid, new_pageno )
253 if ( buf_fids[fid].npages == -1 ) {
254 /* initialize npages field */
255 fd = bf_fid_to_fd ( fid );
257 assert (fid < NUM_FILE_ENTRIES);
259 *new_pageno = buf_fids[fid].npages;
260 if ( *new_pageno == -1 ) {
264 buf_fids[fid].npages++;
265 ndx = BUF_HASH ( fid, *new_pageno );
267 fobj.obj_id = *new_pageno;
268 bhp = bf_assign_buf ( ndx, &fobj, BF_PIN|BF_DIRTY|BF_EMPTY, &len );
269 if ( RELEASE_MASTER ) {
274 return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
283 BF_DIRTY Mark page as dirty
284 BF_EMPTY Don't initialize page, just get buffer
285 BF_PIN Retrieve with pin
288 Might want to add a flag that sets an LSN for this buffer is the
291 Eventually, you may want a flag that indicates the I/O and lock
292 request should be shipped off together, but not for now.
295 buf_get ( file_id, page_id, flags, len )
299 int *len; /* Number of bytes read into buffer */
309 ndx = BUF_HASH ( file_id, page_id );
310 fobj.file_id = (long) file_id;
311 fobj.obj_id = (long) page_id;
316 This could be a for loop, but we lose speed
317 by making all the cases general purpose so we
318 optimize for the no-collision case.
320 bufid = buf_hash_table[ndx];
321 if ( bufid < NUM_BUFS ) {
322 for ( bhp = bufhdr_table+bufid;
323 !OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID);
324 bhp = LISTP_NEXTP ( bufhdr_table, hash, bhp ) ) {
326 if ( bhp->hash.next == bufid ) {
331 if ( flags & BF_PIN ) {
332 bhp->flags |= BUF_PINNED;
335 fprintf(stderr, "buf_get: %X PINNED (%d)\n",
336 buf_table + (bhp-bufhdr_table), bhp->refcount);
339 if ( flags & BF_DIRTY ) {
340 bhp->flags |= BUF_DIRTY;
343 while ( bhp->flags & BUF_IO_IN_PROGRESS ) {
344 /* MIS -- eventually err check here */
346 printf("About to sleep on %d (me: %d\n)\n", bhp->wait_proc,
347 my_txnp - txn_table);
352 stat = proc_sleep_on ( &(bhp->wait_proc), buf_spinlock );
357 if (!( bhp->flags & BUF_IO_IN_PROGRESS) &&
358 (!OBJ_EQ (bhp->id, fobj) || !(bhp->flags & BUF_VALID))) {
361 return(buf_get ( file_id, page_id, flags, len ));
368 /* If you get here, the page isn't in the hash table */
369 bhp = bf_assign_buf ( ndx, &fobj, flags, len );
371 /* Common code between found and not found */
373 if ( bhp && bhp->flags & BUF_NEWPAGE ) {
381 return ((ADDR_T)(buf_table+(bhp-bufhdr_table)));
388 MIS - do I want to add file links to buffer pool?
391 buf_sync ( fid, close )
393 int close; /* should we dec refcount and possibly
394 invalidate all the buffers */
401 if ( (fd = bf_fid_to_fd ( fid )) < 0 ) {
407 invalidate = (buf_fids[fid].refcount == 1 && close);
409 for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
410 if (bhp->id.file_id == fid) {
411 if ((bhp->flags & BF_DIRTY) && (bf_put_page( fd, bhp ) < 0)) {
414 bhp->id.file_id = -1;
417 if (invalidate || close)
418 buf_fids[fid].refcount--;
420 if (RELEASE_MASTER) {
429 buf_flags ( addr, set_flags, unset_flags )
438 fprintf(stderr, "buf_flags: %X setting %s%s%s%s%s releasing %s%s%s%s%s\n",
440 set_flags&BUF_DIRTY ? "DIRTY " : "",
441 set_flags&BUF_VALID ? "VALID " : "",
442 set_flags&BUF_PINNED ? "PINNED " : "",
443 set_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
444 set_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
445 set_flags&BUF_NEWPAGE ? "NEWPAGE " : "",
446 unset_flags&BUF_DIRTY ? "DIRTY " : "",
447 unset_flags&BUF_VALID ? "VALID " : "",
448 unset_flags&BUF_PINNED ? "PINNED " : "",
449 unset_flags&BUF_IO_ERROR ? "IO_ERROR " : "",
450 unset_flags&BUF_IO_IN_PROGRESS ? "IO_IN_PROG " : "",
451 unset_flags&BUF_NEWPAGE ? "NEWPAGE " : "" );
453 if (!ADDR_OK(addr)) {
454 error_log1 ( "buf_pin: Invalid Buffer Address %x\n", addr );
457 bufid = ((BUF_T *)addr) - buf_table;
458 assert ( bufid < NUM_BUFS);
459 bhp = &bufhdr_table[bufid];
463 bhp->flags |= set_flags;
464 if ( set_flags & BUF_PINNED ) {
467 if ( set_flags & BUF_DIRTY ) {
468 unset_flags |= BUF_NEWPAGE;
471 if ( unset_flags & BUF_PINNED ) {
473 if ( bhp->refcount ) {
474 /* Turn off pin bit so it doesn't get unset */
475 unset_flags &= ~BUF_PINNED;
478 bhp->flags &= ~unset_flags;
480 if (RELEASE_MASTER) {
487 Take a string name and produce an fid.
491 MIS -- this is a potential problem -- you keep actual names
492 here -- what if people run from different directories?
495 buf_name_lookup ( fname )
506 for ( i = 0; i < NUM_FILE_ENTRIES; i++ ) {
507 if ( buf_fids[i].offset == -1 ) {
510 if (!strcmp (fname, buf_strings+buf_fids[i].offset)) {
511 if (RELEASE_MASTER) {
514 buf_fids[i].refcount++;
520 error_log0 ( "No more file ID's\n" );
522 ndx = *buf_sp - strlen(fname) - 1;
524 error_log0 ( "Out of string space\n" );
528 strcpy ( buf_strings+ndx, fname );
529 buf_fids[fid].offset = ndx;
531 buf_fids[fid].refcount = 1;
533 if (RELEASE_MASTER) {
545 assert ( (fid < NUM_FILE_ENTRIES) && (buf_fids[fid].offset != -1) );
546 if ( fds[fid] != -1 ) {
550 fds[fid] = open ( buf_strings+buf_fids[fid].offset, O_RDWR|O_CREAT,
552 if ( fds[fid] < 0 ) {
553 error_log3 ( "Error Opening File %s FID: %d FD: %d. Errno = %d\n",
554 buf_strings+buf_fids[fid].offset, fid, fds[fid],
558 error_log3 ( "Opening File %s FID: %d FD: %d\n",
559 buf_strings+buf_fids[fid].offset, fid, fds[fid] );
560 if ( buf_fids[fid].npages == -1 ) {
561 /* Initialize the npages field */
562 if ( fstat ( fds[fid], &sbuf ) ) {
563 error_log3 ( "Error Fstating %s FID: %d. Errno = %d\n",
564 buf_strings+buf_fids[fid].offset, fid, errno );
566 buf_fids[fid].npages = ( sbuf.st_size / BUFSIZE );
574 bf_put_page ( fd, bhp )
580 assert ( (bhp-bufhdr_table) < NUM_BUFS );
581 if ( lseek ( fd, bhp->id.obj_id << BUFSHIFT, L_SET ) < 0 ) {
584 bhp->flags |= BUF_IO_IN_PROGRESS;
585 if (RELEASE_MASTER) {
588 nbytes = write(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
593 error_log1 ("Write failed with error code %d\n", errno);
595 } else if ( nbytes != BUFSIZE ) {
596 error_log1 ("Short write: %d bytes of %d\n", nbytes, BUFSIZE );
598 bhp->flags &= ~(BUF_DIRTY|BUF_IO_IN_PROGRESS);
603 bf_assign_buf ( ndx, obj, flags, len )
607 int *len; /* Number of bytes read */
612 assert ( obj->file_id < NUM_FILE_ENTRIES );
617 OBJ_ASSIGN ( (*obj), bhp->id );
618 if ( buf_hash_table[ndx] >= NUM_BUFS ) {
619 buf_hash_table[ndx] = bhp-bufhdr_table;
621 LISTPE_INSERT ( bufhdr_table, hash, bhp, buf_hash_table[ndx] );
624 bhp->flags |= BUF_VALID;
625 if ( flags & BF_PIN ) {
626 bhp->flags |= BUF_PINNED;
629 fprintf(stderr, "bf_assign_buf: %X PINNED (%d)\n",
630 buf_table + (bhp-bufhdr_table), bhp->refcount);
633 fd = bf_fid_to_fd(obj->file_id);
635 error_log1 ("Invalid fid %d\n", obj->file_id);
636 bhp->flags |= ~BUF_IO_ERROR;
639 if ( obj->obj_id >= buf_fids[obj->file_id].npages) {
640 buf_fids[obj->file_id].npages = obj->obj_id+1;
642 } else if ( flags & BF_EMPTY ) {
645 bhp->flags |= BUF_IO_IN_PROGRESS;
646 if (RELEASE_MASTER) {
649 if ( lseek ( fd, obj->obj_id << BUFSHIFT, L_SET ) < -1 ) {
650 error_log2 ("Unable to perform seek on file: %d to page %d",
651 obj->file_id, obj->obj_id );
652 bhp->flags &= ~BUF_IO_IN_PROGRESS;
653 bhp->flags |= ~BUF_IO_ERROR;
656 *len = read(fd, buf_table[bhp-bufhdr_table], BUFSIZE);
658 error_log2 ("Unable to perform read on file: %d to page %d",
659 obj->file_id, obj->obj_id );
660 bhp->flags &= ~BUF_IO_IN_PROGRESS;
661 bhp->flags |= ~BUF_IO_ERROR;
667 bhp->flags &= ~BUF_IO_IN_PROGRESS;
668 if ( bhp->wait_proc != -1 ) {
669 /* wake up waiter and anyone waiting on it */
671 printf("Waking transaction %d due to completed I/O\n",
674 proc_wake_id ( bhp->wait_proc );
680 if ( flags & BF_DIRTY ) {
681 bhp->flags |= BUF_DIRTY;
682 } else if ( *len < BUFSIZE ) {
683 bhp->flags |= BUF_NEWPAGE;
697 assert ( fid < NUM_FILE_ENTRIES );
698 if ( buf_fids[fid].npages == -1 ) {
699 /* initialize npages field */
700 (void) bf_fid_to_fd ( fid );
702 val = buf_fids[fid].npages;
704 val--; /* Convert to page number */
706 if (RELEASE_MASTER) {
721 printf ( "LRU + %d\n", *buf_lru );
723 printf("ID\tFID\tPID\tLNEXT\tLPREV\tHNEXT\tHPREV\tSLEEP\tFLAG\tREFS\n");
724 for ( bhp = bufhdr_table, i = 0; i < NUM_BUFS; bhp++, i++ ) {
725 printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
726 bhp->id.file_id, bhp->id.obj_id,
727 bhp->lru.next, bhp->lru.prev,
728 bhp->hash.next, bhp->hash.prev,
729 bhp->wait_proc, bhp->flags, bhp->refcount );
732 if ( id >= NUM_BUFS ) {
733 printf ( "Buffer ID (%d) too high\n", id );
736 bhp = bufhdr_table+id;
737 printf ( "%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%x\t%d\n", i,
738 bhp->id.file_id, bhp->id.obj_id,
739 bhp->lru.next, bhp->lru.prev,
740 bhp->hash.next, bhp->hash.prev,
741 bhp->wait_proc, bhp->flags, bhp->refcount );