Update.
[glibc.git] / db2 / log / log_put.c
blobd00e7dde2107a7f3f0b734ca33e27909f179ac4f
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997, 1998
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
9 #ifndef lint
10 static const char sccsid[] = "@(#)log_put.c 10.35 (Sleepycat) 5/6/98";
11 #endif /* not lint */
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
16 #include <errno.h>
17 #include <string.h>
18 #include <time.h>
19 #include <unistd.h>
20 #endif
22 #include "db_int.h"
23 #include "shqueue.h"
24 #include "db_page.h"
25 #include "log.h"
26 #include "hash.h"
27 #include "common_ext.h"
29 static int __log_fill __P((DB_LOG *, DB_LSN *, void *, u_int32_t));
30 static int __log_flush __P((DB_LOG *, const DB_LSN *));
31 static int __log_newfd __P((DB_LOG *));
32 static int __log_putr __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
33 static int __log_write __P((DB_LOG *, void *, u_int32_t));
36 * log_put --
37 * Write a log record.
39 int
40 log_put(dblp, lsn, dbt, flags)
41 DB_LOG *dblp;
42 DB_LSN *lsn;
43 const DBT *dbt;
44 u_int32_t flags;
46 int ret;
48 /* Validate arguments. */
49 #define OKFLAGS (DB_CHECKPOINT | DB_FLUSH | DB_CURLSN)
50 if (flags != 0) {
51 if ((ret =
52 __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0)
53 return (ret);
54 switch (flags) {
55 case DB_CHECKPOINT:
56 case DB_CURLSN:
57 case DB_FLUSH:
58 case 0:
59 break;
60 default:
61 return (__db_ferr(dblp->dbenv, "log_put", 1));
65 LOCK_LOGREGION(dblp);
66 ret = __log_put(dblp, lsn, dbt, flags);
67 UNLOCK_LOGREGION(dblp);
68 return (ret);
72 * __log_put --
73 * Write a log record; internal version.
75 * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, u_int32_t));
77 int
78 __log_put(dblp, lsn, dbt, flags)
79 DB_LOG *dblp;
80 DB_LSN *lsn;
81 const DBT *dbt;
82 u_int32_t flags;
84 DBT fid_dbt, t;
85 DB_LSN r_unused;
86 FNAME *fnp;
87 LOG *lp;
88 u_int32_t lastoff;
89 int ret;
91 lp = dblp->lp;
94 * If the application just wants to know where we are, fill in
95 * the information. Currently used by the transaction manager
96 * to avoid writing TXN_begin records.
98 if (LF_ISSET(DB_CURLSN)) {
99 lsn->file = lp->lsn.file;
100 lsn->offset = lp->lsn.offset;
101 return (0);
104 /* If this information won't fit in the file, swap files. */
105 if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
106 if (sizeof(HDR) +
107 sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
108 __db_err(dblp->dbenv,
109 "log_put: record larger than maximum file size");
110 return (EINVAL);
113 /* Flush the log. */
114 if ((ret = __log_flush(dblp, NULL)) != 0)
115 return (ret);
118 * Save the last known offset from the previous file, we'll
119 * need it to initialize the persistent header information.
121 lastoff = lp->lsn.offset;
123 /* Point the current LSN to the new file. */
124 ++lp->lsn.file;
125 lp->lsn.offset = 0;
127 /* Reset the file write offset. */
128 lp->w_off = 0;
129 } else
130 lastoff = 0;
132 /* Initialize the LSN information returned to the user. */
133 lsn->file = lp->lsn.file;
134 lsn->offset = lp->lsn.offset;
137 * Insert persistent information as the first record in every file.
138 * Note that the previous length is wrong for the very first record
139 * of the log, but that's okay, we check for it during retrieval.
141 if (lp->lsn.offset == 0) {
142 t.data = &lp->persist;
143 t.size = sizeof(LOGP);
144 if ((ret = __log_putr(dblp, lsn,
145 &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
146 return (ret);
148 /* Update the LSN information returned to the user. */
149 lsn->file = lp->lsn.file;
150 lsn->offset = lp->lsn.offset;
153 /* Write the application's log record. */
154 if ((ret = __log_putr(dblp, lsn, dbt, lp->lsn.offset - lp->len)) != 0)
155 return (ret);
158 * On a checkpoint, we:
159 * Put out the checkpoint record (above).
160 * Save the LSN of the checkpoint in the shared region.
161 * Append the set of file name information into the log.
163 if (flags == DB_CHECKPOINT) {
164 lp->chkpt_lsn = *lsn;
166 for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
167 fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
168 memset(&t, 0, sizeof(t));
169 t.data = R_ADDR(dblp, fnp->name_off);
170 t.size = strlen(t.data) + 1;
171 memset(&fid_dbt, 0, sizeof(fid_dbt));
172 fid_dbt.data = fnp->ufid;
173 fid_dbt.size = DB_FILE_ID_LEN;
174 if ((ret = __log_register_log(dblp, NULL, &r_unused, 0,
175 LOG_CHECKPOINT, &t, &fid_dbt, fnp->id, fnp->s_type))
176 != 0)
177 return (ret);
182 * On a checkpoint or when flush is requested, we:
183 * Flush the current buffer contents to disk.
184 * Sync the log to disk.
186 if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
187 if ((ret = __log_flush(dblp, NULL)) != 0)
188 return (ret);
191 * On a checkpoint, we:
192 * Save the time the checkpoint was written.
193 * Reset the bytes written since the last checkpoint.
195 if (flags == DB_CHECKPOINT) {
196 (void)time(&lp->chkpt);
197 lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
199 return (0);
203 * __log_putr --
204 * Actually put a record into the log.
206 static int
207 __log_putr(dblp, lsn, dbt, prev)
208 DB_LOG *dblp;
209 DB_LSN *lsn;
210 const DBT *dbt;
211 u_int32_t prev;
213 HDR hdr;
214 LOG *lp;
215 int ret;
217 lp = dblp->lp;
220 * Initialize the header. If we just switched files, lsn.offset will
221 * be 0, and what we really want is the offset of the previous record
222 * in the previous file. Fortunately, prev holds the value we want.
224 hdr.prev = prev;
225 hdr.len = sizeof(HDR) + dbt->size;
226 hdr.cksum = __ham_func4(dbt->data, dbt->size);
228 if ((ret = __log_fill(dblp, lsn, &hdr, sizeof(HDR))) != 0)
229 return (ret);
230 lp->len = sizeof(HDR);
231 lp->lsn.offset += sizeof(HDR);
233 if ((ret = __log_fill(dblp, lsn, dbt->data, dbt->size)) != 0)
234 return (ret);
235 lp->len += dbt->size;
236 lp->lsn.offset += dbt->size;
237 return (0);
241 * log_flush --
242 * Write all records less than or equal to the specified LSN.
245 log_flush(dblp, lsn)
246 DB_LOG *dblp;
247 const DB_LSN *lsn;
249 int ret;
251 LOCK_LOGREGION(dblp);
252 ret = __log_flush(dblp, lsn);
253 UNLOCK_LOGREGION(dblp);
254 return (ret);
258 * __log_flush --
259 * Write all records less than or equal to the specified LSN; internal
260 * version.
262 static int
263 __log_flush(dblp, lsn)
264 DB_LOG *dblp;
265 const DB_LSN *lsn;
267 DB_LSN t_lsn;
268 LOG *lp;
269 int current, ret;
271 ret = 0;
272 lp = dblp->lp;
275 * If no LSN specified, flush the entire log by setting the flush LSN
276 * to the last LSN written in the log. Otherwise, check that the LSN
277 * isn't a non-existent record for the log.
279 if (lsn == NULL) {
280 t_lsn.file = lp->lsn.file;
281 t_lsn.offset = lp->lsn.offset - lp->len;
282 lsn = &t_lsn;
283 } else
284 if (lsn->file > lp->lsn.file ||
285 (lsn->file == lp->lsn.file &&
286 lsn->offset > lp->lsn.offset - lp->len)) {
287 __db_err(dblp->dbenv,
288 "log_flush: LSN past current end-of-log");
289 return (EINVAL);
293 * If the LSN is less than the last-sync'd LSN, we're done. Note,
294 * the last-sync LSN saved in s_lsn is the LSN of the first byte
295 * we absolutely know has been written to disk, so the test is <=.
297 if (lsn->file < lp->s_lsn.file ||
298 (lsn->file == lp->s_lsn.file && lsn->offset <= lp->s_lsn.offset))
299 return (0);
302 * We may need to write the current buffer. We have to write the
303 * current buffer if the flush LSN is greater than or equal to the
304 * buffer's starting LSN.
306 current = 0;
307 if (lp->b_off != 0 &&
308 lsn->file >= lp->f_lsn.file && lsn->offset >= lp->f_lsn.offset) {
309 if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
310 return (ret);
312 lp->b_off = 0;
313 current = 1;
317 * It's possible that this thread may never have written to this log
318 * file. Acquire a file descriptor if we don't already have one.
320 if (dblp->lfname != dblp->lp->lsn.file)
321 if ((ret = __log_newfd(dblp)) != 0)
322 return (ret);
324 /* Sync all writes to disk. */
325 if ((ret = __db_fsync(dblp->lfd)) != 0)
326 return (ret);
327 ++lp->stat.st_scount;
330 * Set the last-synced LSN, using the LSN of the current buffer. If
331 * the current buffer was flushed, we know the LSN of the first byte
332 * of the buffer is on disk, otherwise, we only know that the LSN of
333 * the record before the one beginning the current buffer is on disk.
335 lp->s_lsn = lp->f_lsn;
336 if (!current)
337 if (lp->s_lsn.offset == 0) {
338 --lp->s_lsn.file;
339 lp->s_lsn.offset = lp->persist.lg_max;
340 } else
341 --lp->s_lsn.offset;
343 return (0);
347 * __log_fill --
348 * Write information into the log.
350 static int
351 __log_fill(dblp, lsn, addr, len)
352 DB_LOG *dblp;
353 DB_LSN *lsn;
354 void *addr;
355 u_int32_t len;
357 LOG *lp;
358 u_int32_t nrec;
359 size_t nw, remain;
360 int ret;
362 /* Copy out the data. */
363 for (lp = dblp->lp; len > 0;) {
365 * If we're beginning a new buffer, note the user LSN to which
366 * the first byte of the buffer belongs. We have to know this
367 * when flushing the buffer so that we know if the in-memory
368 * buffer needs to be flushed.
370 if (lp->b_off == 0)
371 lp->f_lsn = *lsn;
374 * If we're on a buffer boundary and the data is big enough,
375 * copy as many records as we can directly from the data.
377 if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
378 nrec = len / sizeof(lp->buf);
379 if ((ret = __log_write(dblp,
380 addr, nrec * sizeof(lp->buf))) != 0)
381 return (ret);
382 addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
383 len -= nrec * sizeof(lp->buf);
384 continue;
387 /* Figure out how many bytes we can copy this time. */
388 remain = sizeof(lp->buf) - lp->b_off;
389 nw = remain > len ? len : remain;
390 memcpy(lp->buf + lp->b_off, addr, nw);
391 addr = (u_int8_t *)addr + nw;
392 len -= nw;
393 lp->b_off += nw;
395 /* If we fill the buffer, flush it. */
396 if (lp->b_off == sizeof(lp->buf)) {
397 if ((ret =
398 __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
399 return (ret);
400 lp->b_off = 0;
403 return (0);
407 * __log_write --
408 * Write the log buffer to disk.
410 static int
411 __log_write(dblp, addr, len)
412 DB_LOG *dblp;
413 void *addr;
414 u_int32_t len;
416 LOG *lp;
417 ssize_t nw;
418 int ret;
421 * If we haven't opened the log file yet or the current one
422 * has changed, acquire a new log file.
424 lp = dblp->lp;
425 if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
426 if ((ret = __log_newfd(dblp)) != 0)
427 return (ret);
430 * Seek to the offset in the file (someone may have written it
431 * since we last did).
433 if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, 0, SEEK_SET)) != 0)
434 return (ret);
435 if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
436 return (ret);
437 if (nw != (int32_t)len)
438 return (EIO);
440 /* Reset the buffer offset and update the seek offset. */
441 lp->w_off += len;
443 /* Update written statistics. */
444 if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
445 lp->stat.st_w_bytes -= MEGABYTE;
446 ++lp->stat.st_w_mbytes;
448 if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
449 lp->stat.st_wc_bytes -= MEGABYTE;
450 ++lp->stat.st_wc_mbytes;
452 ++lp->stat.st_wcount;
454 return (0);
458 * log_file --
459 * Map a DB_LSN to a file name.
462 log_file(dblp, lsn, namep, len)
463 DB_LOG *dblp;
464 const DB_LSN *lsn;
465 char *namep;
466 size_t len;
468 int ret;
469 char *p;
471 LOCK_LOGREGION(dblp);
472 ret = __log_name(dblp, lsn->file, &p);
473 UNLOCK_LOGREGION(dblp);
474 if (ret != 0)
475 return (ret);
477 /* Check to make sure there's enough room and copy the name. */
478 if (len < strlen(p) + 1) {
479 *namep = '\0';
480 return (ENOMEM);
482 (void)strcpy(namep, p);
483 __db_free(p);
485 return (0);
489 * __log_newfd --
490 * Acquire a file descriptor for the current log file.
492 static int
493 __log_newfd(dblp)
494 DB_LOG *dblp;
496 int ret;
497 char *p;
499 /* Close any previous file descriptor. */
500 if (dblp->lfd != -1) {
501 (void)__db_close(dblp->lfd);
502 dblp->lfd = -1;
505 /* Get the path of the new file and open it. */
506 dblp->lfname = dblp->lp->lsn.file;
507 if ((ret = __log_name(dblp, dblp->lfname, &p)) != 0)
508 return (ret);
509 if ((ret = __db_open(p,
510 DB_CREATE | DB_SEQUENTIAL,
511 DB_CREATE | DB_SEQUENTIAL,
512 dblp->lp->persist.mode, &dblp->lfd)) != 0)
513 __db_err(dblp->dbenv,
514 "log_put: %s: %s", p, strerror(ret));
515 FREES(p);
516 return (ret);
520 * __log_name --
521 * Return the log name for a particular file.
523 * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
526 __log_name(dblp, filenumber, namep)
527 DB_LOG *dblp;
528 char **namep;
529 int filenumber;
531 char name[sizeof(LFNAME) + 10];
533 (void)snprintf(name, sizeof(name), LFNAME, filenumber);
534 return (__db_appname(dblp->dbenv,
535 DB_APP_LOG, dblp->dir, name, 0, NULL, namep));