Update.
[glibc.git] / db2 / log / log_put.c
blob92d95633017513563dfe5d05ef98d9feeca8749a
1 /*-
2 * See the file LICENSE for redistribution information.
4 * Copyright (c) 1996, 1997
5 * Sleepycat Software. All rights reserved.
6 */
7 #include "config.h"
9 #ifndef lint
10 static const char sccsid[] = "@(#)log_put.c 10.20 (Sleepycat) 11/2/97";
11 #endif /* not lint */
13 #ifndef NO_SYSTEM_INCLUDES
14 #include <sys/types.h>
16 #include <errno.h>
17 #include <fcntl.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <time.h>
21 #include <unistd.h>
22 #endif
24 #include "db_int.h"
25 #include "shqueue.h"
26 #include "db_page.h"
27 #include "log.h"
28 #include "hash.h"
29 #include "common_ext.h"
31 static int __log_fill __P((DB_LOG *, void *, u_int32_t));
32 static int __log_flush __P((DB_LOG *, const DB_LSN *));
33 static int __log_newfd __P((DB_LOG *));
34 static int __log_putr __P((DB_LOG *, const DBT *, u_int32_t));
35 static int __log_write __P((DB_LOG *, void *, u_int32_t));
38 * log_put --
39 * Write a log record.
41 int
42 log_put(dblp, lsn, dbt, flags)
43 DB_LOG *dblp;
44 DB_LSN *lsn;
45 const DBT *dbt;
46 int flags;
48 int ret;
50 /* Validate arguments. */
51 #define OKFLAGS (DB_CHECKPOINT | DB_FLUSH)
52 if (flags != 0) {
53 if ((ret =
54 __db_fchk(dblp->dbenv, "log_put", flags, OKFLAGS)) != 0)
55 return (ret);
56 switch (flags) {
57 case DB_CHECKPOINT:
58 case DB_FLUSH:
59 case 0:
60 break;
61 default:
62 return (__db_ferr(dblp->dbenv, "log_put", 1));
66 LOCK_LOGREGION(dblp);
67 ret = __log_put(dblp, lsn, dbt, flags);
68 UNLOCK_LOGREGION(dblp);
69 return (ret);
73 * __log_put --
74 * Write a log record; internal version.
76 * PUBLIC: int __log_put __P((DB_LOG *, DB_LSN *, const DBT *, int));
78 int
79 __log_put(dblp, lsn, dbt, flags)
80 DB_LOG *dblp;
81 DB_LSN *lsn;
82 const DBT *dbt;
83 int flags;
85 DBT t;
86 DBT fid_dbt;
87 DB_LSN r_unused;
88 FNAME *fnp;
89 LOG *lp;
90 u_int32_t lastoff;
91 int ret;
93 lp = dblp->lp;
95 /* If this information won't fit in the file, swap files. */
96 if (lp->lsn.offset + sizeof(HDR) + dbt->size > lp->persist.lg_max) {
97 if (sizeof(HDR) +
98 sizeof(LOGP) + dbt->size > lp->persist.lg_max) {
99 __db_err(dblp->dbenv,
100 "log_put: record larger than maximum file size");
101 return (EINVAL);
104 /* Flush the log. */
105 if ((ret = __log_flush(dblp, NULL)) != 0)
106 return (ret);
109 * Save the last known offset from the previous file, we'll
110 * need it to initialize the persistent header information.
112 lastoff = lp->lsn.offset;
114 /* Point the current LSN to the new file. */
115 ++lp->lsn.file;
116 lp->lsn.offset = 0;
118 /* Reset the file write offset. */
119 lp->w_off = 0;
121 /* Reset the first-unwritten LSN for the buffer. */
122 lp->uw_lsn = lp->lsn;
123 } else
124 lastoff = 0;
127 * Insert persistent information as the first record in every file.
128 * Note that the previous length is wrong for the very first record
129 * of the log, but that's okay, we check for it during retrieval.
131 if (lp->lsn.offset == 0) {
132 t.data = &lp->persist;
133 t.size = sizeof(LOGP);
134 if ((ret = __log_putr(dblp,
135 &t, lastoff == 0 ? 0 : lastoff - lp->len)) != 0)
136 return (ret);
139 /* Initialize the LSN information returned to the user. */
140 lsn->file = lp->lsn.file;
141 lsn->offset = lp->lsn.offset;
143 /* Put out the user's record. */
144 if ((ret = __log_putr(dblp, dbt, lp->lsn.offset - lp->len)) != 0)
145 return (ret);
148 * On a checkpoint, we:
149 * Put out the checkpoint record (above).
150 * Save the LSN of the checkpoint in the shared region.
151 * Append the set of file name information into the log.
153 if (flags == DB_CHECKPOINT) {
154 lp->c_lsn = *lsn;
156 for (fnp = SH_TAILQ_FIRST(&dblp->lp->fq, __fname);
157 fnp != NULL; fnp = SH_TAILQ_NEXT(fnp, q, __fname)) {
158 t.data = R_ADDR(dblp, fnp->name_off);
159 t.size = strlen(t.data) + 1;
160 memset(&fid_dbt, 0, sizeof(fid_dbt));
161 fid_dbt.data = R_ADDR(dblp, fnp->fileid_off);
162 fid_dbt.size = DB_FILE_ID_LEN;
163 if ((ret = __log_register_log(dblp, NULL, &r_unused,
164 0, &t, &fid_dbt, fnp->id, fnp->s_type)) != 0)
165 return (ret);
170 * On a checkpoint or when flush is requested, we:
171 * Flush the current buffer contents to disk.
172 * Sync the log to disk.
174 if (flags == DB_FLUSH || flags == DB_CHECKPOINT)
175 if ((ret = __log_flush(dblp, NULL)) != 0)
176 return (ret);
179 * On a checkpoint, we:
180 * Save the time the checkpoint was written.
181 * Reset the bytes written since the last checkpoint.
183 if (flags == DB_CHECKPOINT) {
184 (void)time(&lp->chkpt);
185 lp->stat.st_wc_bytes = lp->stat.st_wc_mbytes = 0;
189 * When an application calls the log_flush routine, we need to figure
190 * out if the current buffer needs to be flushed. The problem is that
191 * if a record spans buffers, it's possible for the record continued
192 * in the current buffer to have begun in a previous buffer. Each time
193 * we write a buffer, we update the first-unwritten LSN to point to the
194 * first LSN after that written buffer. If we have a spanning record,
195 * correct that value to be the LSN that started it all, here.
197 if (lsn->offset < lp->w_off && lsn->offset + lp->len > lp->w_off)
198 lp->uw_lsn = *lsn;
200 return (0);
204 * __log_putr --
205 * Actually put a record into the log.
207 static int
208 __log_putr(dblp, dbt, prev)
209 DB_LOG *dblp;
210 const DBT *dbt;
211 u_int32_t prev;
213 HDR hdr;
214 LOG *lp;
215 int ret;
217 lp = dblp->lp;
220 * Initialize the header. If we just switched files, lsn.offset will
221 * be 0, and what we really want is the offset of the previous record
222 * in the previous file. Fortunately, prev holds the value we want.
224 hdr.prev = prev;
225 hdr.len = sizeof(HDR) + dbt->size;
226 hdr.cksum = __ham_func4(dbt->data, dbt->size);
228 if ((ret = __log_fill(dblp, &hdr, sizeof(HDR))) != 0)
229 return (ret);
230 lp->lsn.offset += sizeof(HDR);
232 if ((ret = __log_fill(dblp, dbt->data, dbt->size)) != 0)
233 return (ret);
234 lp->lsn.offset += dbt->size;
236 lp->len = sizeof(HDR) + dbt->size;
237 return (0);
241 * log_flush --
242 * Write all records less than or equal to the specified LSN.
245 log_flush(dblp, lsn)
246 DB_LOG *dblp;
247 const DB_LSN *lsn;
249 int ret;
251 LOCK_LOGREGION(dblp);
252 ret = __log_flush(dblp, lsn);
253 UNLOCK_LOGREGION(dblp);
254 return (ret);
258 * __log_flush --
259 * Write all records less than or equal to the specified LSN; internal
260 * version.
262 static int
263 __log_flush(dblp, lsn)
264 DB_LOG *dblp;
265 const DB_LSN *lsn;
267 DB_LSN t_lsn;
268 LOG *lp;
269 int ret;
271 ret = 0;
272 lp = dblp->lp;
275 * If no LSN specified, flush the entire log by setting the flush LSN
276 * to the last LSN written in the log. Otherwise, check that the LSN
277 * isn't a non-existent record for the log.
279 if (lsn == NULL) {
280 t_lsn.file = lp->lsn.file;
281 t_lsn.offset = lp->lsn.offset - lp->len;
282 lsn = &t_lsn;
283 } else
284 if (lsn->file > lp->lsn.file ||
285 (lsn->file == lp->lsn.file &&
286 lsn->offset > lp->lsn.offset - lp->len)) {
287 __db_err(dblp->dbenv,
288 "log_flush: LSN past current end-of-log");
289 return (EINVAL);
293 * If the LSN is less than the last-sync'd LSN, we're done. Note,
294 * the last-sync LSN saved in s_lsn is the LSN of the first byte
295 * that has not yet been written to disk, so the test is <, not <=.
297 if (lsn->file < lp->s_lsn.file ||
298 (lsn->file == lp->s_lsn.file && lsn->offset < lp->s_lsn.offset))
299 return (0);
302 * We may need to write the current buffer. We have to write the
303 * current buffer if the flush LSN is greater than or equal to the
304 * first-unwritten LSN (uw_lsn). If we write the buffer, then we
305 * update the first-unwritten LSN.
307 if (lp->b_off != 0 &&
308 lsn->file >= lp->uw_lsn.file && lsn->offset >= lp->uw_lsn.offset)
309 if ((ret = __log_write(dblp, lp->buf, lp->b_off)) != 0)
310 return (ret);
313 * It's possible that this thread may never have written to this log
314 * file. Acquire a file descriptor if we don't already have one.
316 if (dblp->lfname != dblp->lp->lsn.file)
317 if ((ret = __log_newfd(dblp)) != 0)
318 return (ret);
320 /* Sync all writes to disk. */
321 if ((ret = __db_fsync(dblp->lfd)) != 0)
322 return (ret);
323 ++lp->stat.st_scount;
326 * Set the last-synced LSN, the first LSN after the last record
327 * that we know is on disk.
329 lp->s_lsn = lp->uw_lsn;
331 return (0);
335 * __log_fill --
336 * Write information into the log.
338 static int
339 __log_fill(dblp, addr, len)
340 DB_LOG *dblp;
341 void *addr;
342 u_int32_t len;
344 LOG *lp;
345 u_int32_t nrec;
346 size_t nw, remain;
347 int ret;
349 /* Copy out the data. */
350 for (lp = dblp->lp; len > 0;) {
352 * If we're on a buffer boundary and the data is big enough,
353 * copy as many records as we can directly from the data.
355 if (lp->b_off == 0 && len >= sizeof(lp->buf)) {
356 nrec = len / sizeof(lp->buf);
357 if ((ret = __log_write(dblp,
358 addr, nrec * sizeof(lp->buf))) != 0)
359 return (ret);
360 addr = (u_int8_t *)addr + nrec * sizeof(lp->buf);
361 len -= nrec * sizeof(lp->buf);
362 continue;
365 /* Figure out how many bytes we can copy this time. */
366 remain = sizeof(lp->buf) - lp->b_off;
367 nw = remain > len ? len : remain;
368 memcpy(lp->buf + lp->b_off, addr, nw);
369 addr = (u_int8_t *)addr + nw;
370 len -= nw;
371 lp->b_off += nw;
373 /* If we fill the buffer, flush it. */
374 if (lp->b_off == sizeof(lp->buf) &&
375 (ret = __log_write(dblp, lp->buf, sizeof(lp->buf))) != 0)
376 return (ret);
378 return (0);
382 * __log_write --
383 * Write the log buffer to disk.
385 static int
386 __log_write(dblp, addr, len)
387 DB_LOG *dblp;
388 void *addr;
389 u_int32_t len;
391 LOG *lp;
392 ssize_t nw;
393 int ret;
396 * If we haven't opened the log file yet or the current one
397 * has changed, acquire a new log file.
399 lp = dblp->lp;
400 if (dblp->lfd == -1 || dblp->lfname != lp->lsn.file)
401 if ((ret = __log_newfd(dblp)) != 0)
402 return (ret);
405 * Seek to the offset in the file (someone may have written it
406 * since we last did).
408 if ((ret = __db_seek(dblp->lfd, 0, 0, lp->w_off, SEEK_SET)) != 0)
409 return (ret);
410 if ((ret = __db_write(dblp->lfd, addr, len, &nw)) != 0)
411 return (ret);
412 if (nw != (int32_t)len)
413 return (EIO);
416 * Reset the buffer offset, update the seek offset, and update the
417 * first-unwritten LSN.
419 lp->b_off = 0;
420 lp->w_off += len;
421 lp->uw_lsn.file = lp->lsn.file;
422 lp->uw_lsn.offset = lp->w_off;
424 /* Update written statistics. */
425 if ((lp->stat.st_w_bytes += len) >= MEGABYTE) {
426 lp->stat.st_w_bytes -= MEGABYTE;
427 ++lp->stat.st_w_mbytes;
429 if ((lp->stat.st_wc_bytes += len) >= MEGABYTE) {
430 lp->stat.st_wc_bytes -= MEGABYTE;
431 ++lp->stat.st_wc_mbytes;
433 ++lp->stat.st_wcount;
435 return (0);
439 * log_file --
440 * Map a DB_LSN to a file name.
443 log_file(dblp, lsn, namep, len)
444 DB_LOG *dblp;
445 const DB_LSN *lsn;
446 char *namep;
447 size_t len;
449 int ret;
450 char *p;
452 LOCK_LOGREGION(dblp);
453 ret = __log_name(dblp, lsn->file, &p);
454 UNLOCK_LOGREGION(dblp);
455 if (ret != 0)
456 return (ret);
458 /* Check to make sure there's enough room and copy the name. */
459 if (len < strlen(p)) {
460 *namep = '\0';
461 return (ENOMEM);
463 (void)strcpy(namep, p);
464 __db_free(p);
466 return (0);
470 * __log_newfd --
471 * Acquire a file descriptor for the current log file.
473 static int
474 __log_newfd(dblp)
475 DB_LOG *dblp;
477 int ret;
478 char *p;
480 /* Close any previous file descriptor. */
481 if (dblp->lfd != -1) {
482 (void)__db_close(dblp->lfd);
483 dblp->lfd = -1;
486 /* Get the path of the new file and open it. */
487 dblp->lfname = dblp->lp->lsn.file;
488 if ((ret = __log_name(dblp, dblp->lfname, &p)) != 0)
489 return (ret);
490 if ((ret = __db_open(p,
491 DB_CREATE | DB_SEQUENTIAL,
492 DB_CREATE | DB_SEQUENTIAL,
493 dblp->lp->persist.mode, &dblp->lfd)) != 0)
494 __db_err(dblp->dbenv,
495 "log_put: %s: %s", p, strerror(ret));
496 FREES(p);
497 return (ret);
501 * __log_name --
502 * Return the log name for a particular file.
504 * PUBLIC: int __log_name __P((DB_LOG *, int, char **));
507 __log_name(dblp, filenumber, namep)
508 DB_LOG *dblp;
509 char **namep;
510 int filenumber;
512 char name[sizeof(LFNAME) + 10];
514 (void)snprintf(name, sizeof(name), LFNAME, filenumber);
515 return (__db_appname(dblp->dbenv,
516 DB_APP_LOG, dblp->dir, name, NULL, namep));