2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
10 static const char sccsid
[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97";
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
29 #include "common_ext.h"
31 static int __log_fill
__P((DB_LOG
*, void *, u_int32_t
));
32 static int __log_flush
__P((DB_LOG
*, const DB_LSN
*));
33 static int __log_newfd
__P((DB_LOG
*));
34 static int __log_putr
__P((DB_LOG
*, const DBT
*, u_int32_t
));
35 static int __log_write
__P((DB_LOG
*, void *, u_int32_t
));
42 log_put(dblp
, lsn
, dbt
, flags
)
50 /* Validate arguments. */
51 #define OKFLAGS (DB_CHECKPOINT | DB_FLUSH)
54 __db_fchk(dblp
->dbenv
, "log_put", flags
, OKFLAGS
)) != 0)
62 return (__db_ferr(dblp
->dbenv
, "log_put", 1));
67 ret
= __log_put(dblp
, lsn
, dbt
, flags
);
68 UNLOCK_LOGREGION(dblp
);
74 * Write a log record; internal version.
76 * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
79 __log_put(dblp
, lsn
, dbt
, flags
)
95 /* If this information won't fit in the file, swap files. */
96 if (lp
->lsn
.offset
+ sizeof(HDR
) + dbt
->size
> lp
->persist
.lg_max
) {
98 sizeof(LOGP
) + dbt
->size
> lp
->persist
.lg_max
) {
100 "log_put: record larger than maximum file size");
105 if ((ret
= __log_flush(dblp
, NULL
)) != 0)
109 * Save the last known offset from the previous file, we'll
110 * need it to initialize the persistent header information.
112 lastoff
= lp
->lsn
.offset
;
114 /* Point the current LSN to the new file. */
118 /* Reset the file write offset. */
121 /* Reset the first-unwritten LSN for the buffer. */
122 lp
->uw_lsn
= lp
->lsn
;
127 * Insert persistent information as the first record in every file.
128 * Note that the previous length is wrong for the very first record
129 * of the log, but that's okay, we check for it during retrieval.
131 if (lp
->lsn
.offset
== 0) {
132 t
.data
= &lp
->persist
;
133 t
.size
= sizeof(LOGP
);
134 if ((ret
= __log_putr(dblp
,
135 &t
, lastoff
== 0 ? 0 : lastoff
- lp
->len
)) != 0)
139 /* Initialize the LSN information returned to the user. */
140 lsn
->file
= lp
->lsn
.file
;
141 lsn
->offset
= lp
->lsn
.offset
;
143 /* Put out the user's record. */
144 if ((ret
= __log_putr(dblp
, dbt
, lp
->lsn
.offset
- lp
->len
)) != 0)
148 * On a checkpoint, we:
149 * Put out the checkpoint record (above).
150 * Save the LSN of the checkpoint in the shared region.
151 * Append the set of file name information into the log.
153 if (flags
== DB_CHECKPOINT
) {
156 for (fnp
= SH_TAILQ_FIRST(&dblp
->lp
->fq
, __fname
);
157 fnp
!= NULL
; fnp
= SH_TAILQ_NEXT(fnp
, q
, __fname
)) {
158 t
.data
= R_ADDR(dblp
, fnp
->name_off
);
159 t
.size
= strlen(t
.data
) + 1;
160 memset(&fid_dbt
, 0, sizeof(fid_dbt
));
161 fid_dbt
.data
= R_ADDR(dblp
, fnp
->fileid_off
);
162 fid_dbt
.size
= DB_FILE_ID_LEN
;
163 if ((ret
= __log_register_log(dblp
, NULL
, &r_unused
,
164 0, &t
, &fid_dbt
, fnp
->id
, fnp
->s_type
)) != 0)
170 * On a checkpoint or when flush is requested, we:
171 * Flush the current buffer contents to disk.
172 * Sync the log to disk.
174 if (flags
== DB_FLUSH
|| flags
== DB_CHECKPOINT
)
175 if ((ret
= __log_flush(dblp
, NULL
)) != 0)
179 * On a checkpoint, we:
180 * Save the time the checkpoint was written.
181 * Reset the bytes written since the last checkpoint.
183 if (flags
== DB_CHECKPOINT
) {
184 (void)time(&lp
->chkpt
);
185 lp
->stat
.st_wc_bytes
= lp
->stat
.st_wc_mbytes
= 0;
189 * When an application calls the log_flush routine, we need to figure
190 * out if the current buffer needs to be flushed. The problem is that
191 * if a record spans buffers, it's possible for the record continued
192 * in the current buffer to have begun in a previous buffer. Each time
193 * we write a buffer, we update the first-unwritten LSN to point to the
194 * first LSN after that written buffer. If we have a spanning record,
195 * correct that value to be the LSN that started it all, here.
197 if (lsn
->offset
< lp
->w_off
&& lsn
->offset
+ lp
->len
> lp
->w_off
)
205 * Actually put a record into the log.
208 __log_putr(dblp
, dbt
, prev
)
220 * Initialize the header. If we just switched files, lsn.offset will
221 * be 0, and what we really want is the offset of the previous record
222 * in the previous file. Fortunately, prev holds the value we want.
225 hdr
.len
= sizeof(HDR
) + dbt
->size
;
226 hdr
.cksum
= __ham_func4(dbt
->data
, dbt
->size
);
228 if ((ret
= __log_fill(dblp
, &hdr
, sizeof(HDR
))) != 0)
230 lp
->lsn
.offset
+= sizeof(HDR
);
232 if ((ret
= __log_fill(dblp
, dbt
->data
, dbt
->size
)) != 0)
234 lp
->lsn
.offset
+= dbt
->size
;
236 lp
->len
= sizeof(HDR
) + dbt
->size
;
242 * Write all records less than or equal to the specified LSN.
251 LOCK_LOGREGION(dblp
);
252 ret
= __log_flush(dblp
, lsn
);
253 UNLOCK_LOGREGION(dblp
);
259 * Write all records less than or equal to the specified LSN; internal
263 __log_flush(dblp
, lsn
)
275 * If no LSN specified, flush the entire log by setting the flush LSN
276 * to the last LSN written in the log. Otherwise, check that the LSN
277 * isn't a non-existent record for the log.
280 t_lsn
.file
= lp
->lsn
.file
;
281 t_lsn
.offset
= lp
->lsn
.offset
- lp
->len
;
284 if (lsn
->file
> lp
->lsn
.file
||
285 (lsn
->file
== lp
->lsn
.file
&&
286 lsn
->offset
> lp
->lsn
.offset
- lp
->len
)) {
287 __db_err(dblp
->dbenv
,
288 "log_flush: LSN past current end-of-log");
293 * If the LSN is less than the last-sync'd LSN, we're done. Note,
294 * the last-sync LSN saved in s_lsn is the LSN of the first byte
295 * that has not yet been written to disk, so the test is <, not <=.
297 if (lsn
->file
< lp
->s_lsn
.file
||
298 (lsn
->file
== lp
->s_lsn
.file
&& lsn
->offset
< lp
->s_lsn
.offset
))
302 * We may need to write the current buffer. We have to write the
303 * current buffer if the flush LSN is greater than or equal to the
304 * first-unwritten LSN (uw_lsn). If we write the buffer, then we
305 * update the first-unwritten LSN.
307 if (lp
->b_off
!= 0 &&
308 lsn
->file
>= lp
->uw_lsn
.file
&& lsn
->offset
>= lp
->uw_lsn
.offset
)
309 if ((ret
= __log_write(dblp
, lp
->buf
, lp
->b_off
)) != 0)
313 * It's possible that this thread may never have written to this log
314 * file. Acquire a file descriptor if we don't already have one.
316 if (dblp
->lfname
!= dblp
->lp
->lsn
.file
)
317 if ((ret
= __log_newfd(dblp
)) != 0)
320 /* Sync all writes to disk. */
321 if ((ret
= __db_fsync(dblp
->lfd
)) != 0)
323 ++lp
->stat
.st_scount
;
326 * Set the last-synced LSN, the first LSN after the last record
327 * that we know is on disk.
329 lp
->s_lsn
= lp
->uw_lsn
;
336 * Write information into the log.
339 __log_fill(dblp
, addr
, len
)
349 /* Copy out the data. */
350 for (lp
= dblp
->lp
; len
> 0;) {
352 * If we're on a buffer boundary and the data is big enough,
353 * copy as many records as we can directly from the data.
355 if (lp
->b_off
== 0 && len
>= sizeof(lp
->buf
)) {
356 nrec
= len
/ sizeof(lp
->buf
);
357 if ((ret
= __log_write(dblp
,
358 addr
, nrec
* sizeof(lp
->buf
))) != 0)
360 addr
= (u_int8_t
*)addr
+ nrec
* sizeof(lp
->buf
);
361 len
-= nrec
* sizeof(lp
->buf
);
365 /* Figure out how many bytes we can copy this time. */
366 remain
= sizeof(lp
->buf
) - lp
->b_off
;
367 nw
= remain
> len
? len
: remain
;
368 memcpy(lp
->buf
+ lp
->b_off
, addr
, nw
);
369 addr
= (u_int8_t
*)addr
+ nw
;
373 /* If we fill the buffer, flush it. */
374 if (lp
->b_off
== sizeof(lp
->buf
) &&
375 (ret
= __log_write(dblp
, lp
->buf
, sizeof(lp
->buf
))) != 0)
383 * Write the log buffer to disk.
386 __log_write(dblp
, addr
, len
)
396 * If we haven't opened the log file yet or the current one
397 * has changed, acquire a new log file.
400 if (dblp
->lfd
== -1 || dblp
->lfname
!= lp
->lsn
.file
)
401 if ((ret
= __log_newfd(dblp
)) != 0)
405 * Seek to the offset in the file (someone may have written it
406 * since we last did).
408 if ((ret
= __db_seek(dblp
->lfd
, 0, 0, lp
->w_off
, SEEK_SET
)) != 0)
410 if ((ret
= __db_write(dblp
->lfd
, addr
, len
, &nw
)) != 0)
412 if (nw
!= (int32_t)len
)
416 * Reset the buffer offset, update the seek offset, and update the
417 * first-unwritten LSN.
421 lp
->uw_lsn
.file
= lp
->lsn
.file
;
422 lp
->uw_lsn
.offset
= lp
->w_off
;
424 /* Update written statistics. */
425 if ((lp
->stat
.st_w_bytes
+= len
) >= MEGABYTE
) {
426 lp
->stat
.st_w_bytes
-= MEGABYTE
;
427 ++lp
->stat
.st_w_mbytes
;
429 if ((lp
->stat
.st_wc_bytes
+= len
) >= MEGABYTE
) {
430 lp
->stat
.st_wc_bytes
-= MEGABYTE
;
431 ++lp
->stat
.st_wc_mbytes
;
433 ++lp
->stat
.st_wcount
;
440 * Map a DB_LSN to a file name.
443 log_file(dblp
, lsn
, namep
, len
)
452 LOCK_LOGREGION(dblp
);
453 ret
= __log_name(dblp
, lsn
->file
, &p
);
454 UNLOCK_LOGREGION(dblp
);
458 /* Check to make sure there's enough room and copy the name. */
459 if (len
< strlen(p
)) {
463 (void)strcpy(namep
, p
);
471 * Acquire a file descriptor for the current log file.
480 /* Close any previous file descriptor. */
481 if (dblp
->lfd
!= -1) {
482 (void)__db_close(dblp
->lfd
);
486 /* Get the path of the new file and open it. */
487 dblp
->lfname
= dblp
->lp
->lsn
.file
;
488 if ((ret
= __log_name(dblp
, dblp
->lfname
, &p
)) != 0)
490 if ((ret
= __db_open(p
,
491 DB_CREATE
| DB_SEQUENTIAL
,
492 DB_CREATE
| DB_SEQUENTIAL
,
493 dblp
->lp
->persist
.mode
, &dblp
->lfd
)) != 0)
494 __db_err(dblp
->dbenv
,
495 "log_put: %s: %s", p
, strerror(ret
));
502 * Return the log name for a particular file.
504 * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
507 __log_name(dblp
, filenumber
, namep
)
512 char name
[sizeof(LFNAME
) + 10];
514 (void)snprintf(name
, sizeof(name
), LFNAME
, filenumber
);
515 return (__db_appname(dblp
->dbenv
,
516 DB_APP_LOG
, dblp
->dir
, name
, NULL
, namep
));