2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
10 static const char sccsid
[] = "@(#)log_put.c 10.35 (Sleepycat) 5/6/98";
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
27 #include "common_ext.h"
29 static int __log_fill
__P((DB_LOG
*, DB_LSN
*, void *, u_int32_t
));
30 static int __log_flush
__P((DB_LOG
*, const DB_LSN
*));
31 static int __log_newfd
__P((DB_LOG
*));
32 static int __log_putr
__P((DB_LOG
*, DB_LSN
*, const DBT
*, u_int32_t
));
33 static int __log_write
__P((DB_LOG
*, void *, u_int32_t
));
40 log_put(dblp
, lsn
, dbt
, flags
)
48 /* Validate arguments. */
49 #define OKFLAGS (DB_CHECKPOINT | DB_FLUSH | DB_CURLSN)
52 __db_fchk(dblp
->dbenv
, "log_put", flags
, OKFLAGS
)) != 0)
61 return (__db_ferr(dblp
->dbenv
, "log_put", 1));
66 ret
= __log_put(dblp
, lsn
, dbt
, flags
);
67 UNLOCK_LOGREGION(dblp
);
73 * Write a log record; internal version.
75 * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
78 __log_put(dblp
, lsn
, dbt
, flags
)
94 * If the application just wants to know where we are, fill in
95 * the information. Currently used by the transaction manager
96 * to avoid writing TXN_begin records.
98 if (LF_ISSET(DB_CURLSN
)) {
99 lsn
->file
= lp
->lsn
.file
;
100 lsn
->offset
= lp
->lsn
.offset
;
104 /* If this information won't fit in the file, swap files. */
105 if (lp
->lsn
.offset
+ sizeof(HDR
) + dbt
->size
> lp
->persist
.lg_max
) {
107 sizeof(LOGP
) + dbt
->size
> lp
->persist
.lg_max
) {
108 __db_err(dblp
->dbenv
,
109 "log_put: record larger than maximum file size");
114 if ((ret
= __log_flush(dblp
, NULL
)) != 0)
118 * Save the last known offset from the previous file, we'll
119 * need it to initialize the persistent header information.
121 lastoff
= lp
->lsn
.offset
;
123 /* Point the current LSN to the new file. */
127 /* Reset the file write offset. */
132 /* Initialize the LSN information returned to the user. */
133 lsn
->file
= lp
->lsn
.file
;
134 lsn
->offset
= lp
->lsn
.offset
;
137 * Insert persistent information as the first record in every file.
138 * Note that the previous length is wrong for the very first record
139 * of the log, but that's okay, we check for it during retrieval.
141 if (lp
->lsn
.offset
== 0) {
142 t
.data
= &lp
->persist
;
143 t
.size
= sizeof(LOGP
);
144 if ((ret
= __log_putr(dblp
, lsn
,
145 &t
, lastoff
== 0 ? 0 : lastoff
- lp
->len
)) != 0)
148 /* Update the LSN information returned to the user. */
149 lsn
->file
= lp
->lsn
.file
;
150 lsn
->offset
= lp
->lsn
.offset
;
153 /* Write the application's log record. */
154 if ((ret
= __log_putr(dblp
, lsn
, dbt
, lp
->lsn
.offset
- lp
->len
)) != 0)
158 * On a checkpoint, we:
159 * Put out the checkpoint record (above).
160 * Save the LSN of the checkpoint in the shared region.
161 * Append the set of file name information into the log.
163 if (flags
== DB_CHECKPOINT
) {
164 lp
->chkpt_lsn
= *lsn
;
166 for (fnp
= SH_TAILQ_FIRST(&dblp
->lp
->fq
, __fname
);
167 fnp
!= NULL
; fnp
= SH_TAILQ_NEXT(fnp
, q
, __fname
)) {
168 memset(&t
, 0, sizeof(t
));
169 t
.data
= R_ADDR(dblp
, fnp
->name_off
);
170 t
.size
= strlen(t
.data
) + 1;
171 memset(&fid_dbt
, 0, sizeof(fid_dbt
));
172 fid_dbt
.data
= fnp
->ufid
;
173 fid_dbt
.size
= DB_FILE_ID_LEN
;
174 if ((ret
= __log_register_log(dblp
, NULL
, &r_unused
, 0,
175 LOG_CHECKPOINT
, &t
, &fid_dbt
, fnp
->id
, fnp
->s_type
))
182 * On a checkpoint or when flush is requested, we:
183 * Flush the current buffer contents to disk.
184 * Sync the log to disk.
186 if (flags
== DB_FLUSH
|| flags
== DB_CHECKPOINT
)
187 if ((ret
= __log_flush(dblp
, NULL
)) != 0)
191 * On a checkpoint, we:
192 * Save the time the checkpoint was written.
193 * Reset the bytes written since the last checkpoint.
195 if (flags
== DB_CHECKPOINT
) {
196 (void)time(&lp
->chkpt
);
197 lp
->stat
.st_wc_bytes
= lp
->stat
.st_wc_mbytes
= 0;
204 * Actually put a record into the log.
207 __log_putr(dblp
, lsn
, dbt
, prev
)
220 * Initialize the header. If we just switched files, lsn.offset will
221 * be 0, and what we really want is the offset of the previous record
222 * in the previous file. Fortunately, prev holds the value we want.
225 hdr
.len
= sizeof(HDR
) + dbt
->size
;
226 hdr
.cksum
= __ham_func4(dbt
->data
, dbt
->size
);
228 if ((ret
= __log_fill(dblp
, lsn
, &hdr
, sizeof(HDR
))) != 0)
230 lp
->len
= sizeof(HDR
);
231 lp
->lsn
.offset
+= sizeof(HDR
);
233 if ((ret
= __log_fill(dblp
, lsn
, dbt
->data
, dbt
->size
)) != 0)
235 lp
->len
+= dbt
->size
;
236 lp
->lsn
.offset
+= dbt
->size
;
242 * Write all records less than or equal to the specified LSN.
251 LOCK_LOGREGION(dblp
);
252 ret
= __log_flush(dblp
, lsn
);
253 UNLOCK_LOGREGION(dblp
);
259 * Write all records less than or equal to the specified LSN; internal
263 __log_flush(dblp
, lsn
)
275 * If no LSN specified, flush the entire log by setting the flush LSN
276 * to the last LSN written in the log. Otherwise, check that the LSN
277 * isn't a non-existent record for the log.
280 t_lsn
.file
= lp
->lsn
.file
;
281 t_lsn
.offset
= lp
->lsn
.offset
- lp
->len
;
284 if (lsn
->file
> lp
->lsn
.file
||
285 (lsn
->file
== lp
->lsn
.file
&&
286 lsn
->offset
> lp
->lsn
.offset
- lp
->len
)) {
287 __db_err(dblp
->dbenv
,
288 "log_flush: LSN past current end-of-log");
293 * If the LSN is less than the last-sync'd LSN, we're done. Note,
294 * the last-sync LSN saved in s_lsn is the LSN of the first byte
295 * we absolutely know has been written to disk, so the test is <=.
297 if (lsn
->file
< lp
->s_lsn
.file
||
298 (lsn
->file
== lp
->s_lsn
.file
&& lsn
->offset
<= lp
->s_lsn
.offset
))
302 * We may need to write the current buffer. We have to write the
303 * current buffer if the flush LSN is greater than or equal to the
304 * buffer's starting LSN.
307 if (lp
->b_off
!= 0 &&
308 lsn
->file
>= lp
->f_lsn
.file
&& lsn
->offset
>= lp
->f_lsn
.offset
) {
309 if ((ret
= __log_write(dblp
, lp
->buf
, lp
->b_off
)) != 0)
317 * It's possible that this thread may never have written to this log
318 * file. Acquire a file descriptor if we don't already have one.
320 if (dblp
->lfname
!= dblp
->lp
->lsn
.file
)
321 if ((ret
= __log_newfd(dblp
)) != 0)
324 /* Sync all writes to disk. */
325 if ((ret
= __db_fsync(dblp
->lfd
)) != 0)
327 ++lp
->stat
.st_scount
;
330 * Set the last-synced LSN, using the LSN of the current buffer. If
331 * the current buffer was flushed, we know the LSN of the first byte
332 * of the buffer is on disk, otherwise, we only know that the LSN of
333 * the record before the one beginning the current buffer is on disk.
335 lp
->s_lsn
= lp
->f_lsn
;
337 if (lp
->s_lsn
.offset
== 0) {
339 lp
->s_lsn
.offset
= lp
->persist
.lg_max
;
348 * Write information into the log.
351 __log_fill(dblp
, lsn
, addr
, len
)
362 /* Copy out the data. */
363 for (lp
= dblp
->lp
; len
> 0;) {
365 * If we're beginning a new buffer, note the user LSN to which
366 * the first byte of the buffer belongs. We have to know this
367 * when flushing the buffer so that we know if the in-memory
368 * buffer needs to be flushed.
374 * If we're on a buffer boundary and the data is big enough,
375 * copy as many records as we can directly from the data.
377 if (lp
->b_off
== 0 && len
>= sizeof(lp
->buf
)) {
378 nrec
= len
/ sizeof(lp
->buf
);
379 if ((ret
= __log_write(dblp
,
380 addr
, nrec
* sizeof(lp
->buf
))) != 0)
382 addr
= (u_int8_t
*)addr
+ nrec
* sizeof(lp
->buf
);
383 len
-= nrec
* sizeof(lp
->buf
);
387 /* Figure out how many bytes we can copy this time. */
388 remain
= sizeof(lp
->buf
) - lp
->b_off
;
389 nw
= remain
> len
? len
: remain
;
390 memcpy(lp
->buf
+ lp
->b_off
, addr
, nw
);
391 addr
= (u_int8_t
*)addr
+ nw
;
395 /* If we fill the buffer, flush it. */
396 if (lp
->b_off
== sizeof(lp
->buf
)) {
398 __log_write(dblp
, lp
->buf
, sizeof(lp
->buf
))) != 0)
408 * Write the log buffer to disk.
411 __log_write(dblp
, addr
, len
)
421 * If we haven't opened the log file yet or the current one
422 * has changed, acquire a new log file.
425 if (dblp
->lfd
== -1 || dblp
->lfname
!= lp
->lsn
.file
)
426 if ((ret
= __log_newfd(dblp
)) != 0)
430 * Seek to the offset in the file (someone may have written it
431 * since we last did).
433 if ((ret
= __db_seek(dblp
->lfd
, 0, 0, lp
->w_off
, 0, SEEK_SET
)) != 0)
435 if ((ret
= __db_write(dblp
->lfd
, addr
, len
, &nw
)) != 0)
437 if (nw
!= (int32_t)len
)
440 /* Reset the buffer offset and update the seek offset. */
443 /* Update written statistics. */
444 if ((lp
->stat
.st_w_bytes
+= len
) >= MEGABYTE
) {
445 lp
->stat
.st_w_bytes
-= MEGABYTE
;
446 ++lp
->stat
.st_w_mbytes
;
448 if ((lp
->stat
.st_wc_bytes
+= len
) >= MEGABYTE
) {
449 lp
->stat
.st_wc_bytes
-= MEGABYTE
;
450 ++lp
->stat
.st_wc_mbytes
;
452 ++lp
->stat
.st_wcount
;
459 * Map a DB_LSN to a file name.
462 log_file(dblp
, lsn
, namep
, len
)
471 LOCK_LOGREGION(dblp
);
472 ret
= __log_name(dblp
, lsn
->file
, &p
);
473 UNLOCK_LOGREGION(dblp
);
477 /* Check to make sure there's enough room and copy the name. */
478 if (len
< strlen(p
) + 1) {
482 (void)strcpy(namep
, p
);
490 * Acquire a file descriptor for the current log file.
499 /* Close any previous file descriptor. */
500 if (dblp
->lfd
!= -1) {
501 (void)__db_close(dblp
->lfd
);
505 /* Get the path of the new file and open it. */
506 dblp
->lfname
= dblp
->lp
->lsn
.file
;
507 if ((ret
= __log_name(dblp
, dblp
->lfname
, &p
)) != 0)
509 if ((ret
= __db_open(p
,
510 DB_CREATE
| DB_SEQUENTIAL
,
511 DB_CREATE
| DB_SEQUENTIAL
,
512 dblp
->lp
->persist
.mode
, &dblp
->lfd
)) != 0)
513 __db_err(dblp
->dbenv
,
514 "log_put: %s: %s", p
, strerror(ret
));
521 * Return the log name for a particular file.
523 * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
526 __log_name(dblp
, filenumber
, namep
)
531 char name
[sizeof(LFNAME
) + 10];
533 (void)snprintf(name
, sizeof(name
), LFNAME
, filenumber
);
534 return (__db_appname(dblp
->dbenv
,
535 DB_APP_LOG
, dblp
->dir
, name
, 0, NULL
, namep
));