2 * Copyright (C) 2004 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2001 Internet Software Consortium.
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: journal.c,v 1.77.2.2 2004/03/09 06:11:02 marka Exp $ */
26 #include <isc/stdio.h>
27 #include <isc/string.h>
30 #include <dns/compress.h>
32 #include <dns/dbiterator.h>
34 #include <dns/fixedname.h>
35 #include <dns/journal.h>
37 #include <dns/rdataset.h>
38 #include <dns/rdatasetiter.h>
39 #include <dns/result.h>
43 * When true, accept IXFR difference sequences where the
44 * SOA serial number does not change (BIND 8 sends such
47 static isc_boolean_t bind8_compat
= ISC_TRUE
; /* XXX config */
49 /**************************************************************************/
51 * Miscellaneous utilities.
54 #define JOURNAL_COMMON_LOGARGS \
55 dns_lctx, DNS_LOGCATEGORY_GENERAL, DNS_LOGMODULE_JOURNAL
57 #define JOURNAL_DEBUG_LOGARGS(n) \
58 JOURNAL_COMMON_LOGARGS, ISC_LOG_DEBUG(n)
61 * It would be non-sensical (or at least obtuse) to use FAIL() with an
62 * ISC_R_SUCCESS code, but the test is there to keep the Solaris compiler
63 * from complaining about "end-of-loop code not reached".
66 do { result = (code); \
67 if (result != ISC_R_SUCCESS) goto failure; \
72 if (result != ISC_R_SUCCESS) goto failure; \
75 static inline isc_uint32_t
76 decode_uint32(unsigned char *p
) {
77 return ((p
[0] << 24) +
84 encode_uint32(isc_uint32_t val
, unsigned char *p
) {
85 p
[0] = (isc_uint8_t
)(val
>> 24);
86 p
[1] = (isc_uint8_t
)(val
>> 16);
87 p
[2] = (isc_uint8_t
)(val
>> 8);
88 p
[3] = (isc_uint8_t
)(val
>> 0);
92 dns_db_createsoatuple(dns_db_t
*db
, dns_dbversion_t
*ver
, isc_mem_t
*mctx
,
93 dns_diffop_t op
, dns_difftuple_t
**tp
)
97 dns_rdataset_t rdataset
;
98 dns_rdata_t rdata
= DNS_RDATA_INIT
;
101 zonename
= dns_db_origin(db
);
104 result
= dns_db_findnode(db
, zonename
, ISC_FALSE
, &node
);
105 if (result
!= ISC_R_SUCCESS
)
108 dns_rdataset_init(&rdataset
);
109 result
= dns_db_findrdataset(db
, node
, ver
, dns_rdatatype_soa
, 0,
110 (isc_stdtime_t
)0, &rdataset
, NULL
);
111 if (result
!= ISC_R_SUCCESS
)
114 result
= dns_rdataset_first(&rdataset
);
115 if (result
!= ISC_R_SUCCESS
)
118 dns_rdataset_current(&rdataset
, &rdata
);
120 result
= dns_difftuple_create(mctx
, op
, zonename
, rdataset
.ttl
,
123 dns_rdataset_disassociate(&rdataset
);
124 dns_db_detachnode(db
, &node
);
125 return (ISC_R_SUCCESS
);
128 dns_db_detachnode(db
, &node
);
130 UNEXPECTED_ERROR(__FILE__
, __LINE__
, "missing SOA");
134 /**************************************************************************/
140 * A journal file consists of
142 * - A fixed-size header of type journal_rawheader_t.
144 * - The index. This is an unordered array of index entries
145 * of type journal_rawpos_t giving the locations
146 * of some arbitrary subset of the journal's addressable
147 * transactions. The index entries are used as hints to
148 * speed up the process of locating a transaction with a given
149 * serial number. Unused index entries have an "offset"
150 * field of zero. The size of the index can vary between
151 * journal files, but does not change during the lifetime
152 * of a file. The size can be zero.
154 * - The journal data. This consists of one or more transactions.
155 * Each transaction begins with a transaction header of type
156 * journal_rawxhdr_t. The transaction header is followed by a
157 * sequence of RRs, similar in structure to an IXFR difference
158 * sequence (RFC1995). That is, the pre-transaction SOA,
159 * zero or more other deleted RRs, the post-transaction SOA,
160 * and zero or more other added RRs. Unlike in IXFR, each RR
161 * is prefixed with a 32-bit length.
163 * The journal data part grows as new transactions are
164 * appended to the file. Only those transactions
165 * whose serial number is current-(2^31-1) to current
166 * are considered "addressable" and may be pointed
167 * to from the header or index. They may be preceded
168 * by old transactions that are no longer addressable,
169 * and they may be followed by transactions that were
170 * appended to the journal but never committed by updating
171 * the "end" position in the header. The latter will
172 * be overwritten when new transactions are added.
176 * On-disk representation of a "pointer" to a journal entry.
177 * These are used in the journal header to locate the beginning
178 * and end of the journal, and in the journal index to locate
179 * other transactions.
182 unsigned char serial
[4]; /* SOA serial before update. */
184 * XXXRTH Should offset be 8 bytes?
185 * XXXDCL ... probably, since isc_offset_t is 8 bytes on many OSs.
186 * XXXAG ... but we will not be able to seek >2G anyway on many
187 * platforms as long as we are using fseek() rather
190 unsigned char offset
[4]; /* Offset from beginning of file. */
194 * The on-disk representation of the journal header.
195 * All numbers are stored in big-endian order.
199 * The header is of a fixed size, with some spare room for future
202 #define JOURNAL_HEADER_SIZE 64 /* Bytes. */
206 /* File format version ID. */
207 unsigned char format
[16];
208 /* Position of the first addressable transaction */
209 journal_rawpos_t begin
;
210 /* Position of the next (yet nonexistent) transaction. */
211 journal_rawpos_t end
;
212 /* Number of index entries following the header. */
213 unsigned char index_size
[4];
215 /* Pad the header to a fixed size. */
216 unsigned char pad
[JOURNAL_HEADER_SIZE
];
217 } journal_rawheader_t
;
220 * The on-disk representation of the transaction header.
221 * There is one of these at the beginning of each transaction.
224 unsigned char size
[4]; /* In bytes, excluding header. */
225 unsigned char serial0
[4]; /* SOA serial before update. */
226 unsigned char serial1
[4]; /* SOA serial after update. */
230 * The on-disk representation of the RR header.
231 * There is one of these at the beginning of each RR.
234 unsigned char size
[4]; /* In bytes, excluding header. */
235 } journal_rawrrhdr_t
;
238 * The in-core representation of the journal header.
245 #define POS_VALID(pos) ((pos).offset != 0)
246 #define POS_INVALIDATE(pos) ((pos).offset = 0, (pos).serial = 0)
249 unsigned char format
[16];
252 isc_uint32_t index_size
;
256 * The in-core representation of the transaction header.
261 isc_uint32_t serial0
;
262 isc_uint32_t serial1
;
266 * The in-core representation of the RR header.
274 * Initial contents to store in the header of a newly created
277 * The header starts with the magic string ";BIND LOG V9\n"
278 * to identify the file as a BIND 9 journal file. An ASCII
279 * identification string is used rather than a binary magic
280 * number to be consistent with BIND 8 (BIND 8 journal files
281 * are ASCII text files).
284 static journal_header_t
285 initial_journal_header
= { ";BIND LOG V9\n", { 0, 0 }, { 0, 0 }, 0 };
287 #define JOURNAL_EMPTY(h) ((h)->begin.offset == (h)->end.offset)
290 JOURNAL_STATE_INVALID
,
293 JOURNAL_STATE_TRANSACTION
297 unsigned int magic
; /* JOUR */
298 isc_mem_t
*mctx
; /* Memory context */
299 journal_state_t state
;
300 const char *filename
; /* Journal file name */
301 FILE * fp
; /* File handle */
302 isc_offset_t offset
; /* Current file offset */
303 journal_header_t header
; /* In-core journal header */
304 unsigned char *rawindex
; /* In-core buffer for journal
305 index in on-disk format */
306 journal_pos_t
*index
; /* In-core journal index */
308 /* Current transaction state (when writing). */
310 unsigned int n_soa
; /* Number of SOAs seen */
311 journal_pos_t pos
[2]; /* Begin/end position */
314 /* Iteration state (when reading). */
316 /* These define the part of the journal we iterate over. */
317 journal_pos_t bpos
; /* Position before first, */
318 journal_pos_t epos
; /* and after last
320 /* The rest is iterator state. */
321 isc_uint32_t current_serial
; /* Current SOA serial */
322 isc_buffer_t source
; /* Data from disk */
323 isc_buffer_t target
; /* Data from _fromwire check */
324 dns_decompress_t dctx
; /* Dummy decompression ctx */
325 dns_name_t name
; /* Current domain name */
326 dns_rdata_t rdata
; /* Current rdata */
327 isc_uint32_t ttl
; /* Current TTL */
328 unsigned int xsize
; /* Size of transaction data */
329 unsigned int xpos
; /* Current position in it */
330 isc_result_t result
; /* Result of last call */
334 #define DNS_JOURNAL_MAGIC ISC_MAGIC('J', 'O', 'U', 'R')
335 #define DNS_JOURNAL_VALID(t) ISC_MAGIC_VALID(t, DNS_JOURNAL_MAGIC)
338 journal_pos_decode(journal_rawpos_t
*raw
, journal_pos_t
*cooked
) {
339 cooked
->serial
= decode_uint32(raw
->serial
);
340 cooked
->offset
= decode_uint32(raw
->offset
);
344 journal_pos_encode(journal_rawpos_t
*raw
, journal_pos_t
*cooked
) {
345 encode_uint32(cooked
->serial
, raw
->serial
);
346 encode_uint32(cooked
->offset
, raw
->offset
);
350 journal_header_decode(journal_rawheader_t
*raw
, journal_header_t
*cooked
) {
351 INSIST(sizeof(cooked
->format
) == sizeof(raw
->h
.format
));
352 memcpy(cooked
->format
, raw
->h
.format
, sizeof(cooked
->format
));
353 journal_pos_decode(&raw
->h
.begin
, &cooked
->begin
);
354 journal_pos_decode(&raw
->h
.end
, &cooked
->end
);
355 cooked
->index_size
= decode_uint32(raw
->h
.index_size
);
359 journal_header_encode(journal_header_t
*cooked
, journal_rawheader_t
*raw
) {
360 INSIST(sizeof(cooked
->format
) == sizeof(raw
->h
.format
));
361 memset(raw
->pad
, 0, sizeof(raw
->pad
));
362 memcpy(raw
->h
.format
, cooked
->format
, sizeof(raw
->h
.format
));
363 journal_pos_encode(&raw
->h
.begin
, &cooked
->begin
);
364 journal_pos_encode(&raw
->h
.end
, &cooked
->end
);
365 encode_uint32(cooked
->index_size
, raw
->h
.index_size
);
369 * Journal file I/O subroutines, with error checking and reporting.
372 journal_seek(dns_journal_t
*j
, isc_uint32_t offset
) {
374 result
= isc_stdio_seek(j
->fp
, (long)offset
, SEEK_SET
);
375 if (result
!= ISC_R_SUCCESS
) {
376 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
377 "%s: seek: %s", j
->filename
,
378 isc_result_totext(result
));
379 return (ISC_R_UNEXPECTED
);
382 return (ISC_R_SUCCESS
);
386 journal_read(dns_journal_t
*j
, void *mem
, size_t nbytes
) {
389 result
= isc_stdio_read(mem
, 1, nbytes
, j
->fp
, NULL
);
390 if (result
!= ISC_R_SUCCESS
) {
391 if (result
== ISC_R_EOF
)
392 return (ISC_R_NOMORE
);
393 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
395 j
->filename
, isc_result_totext(result
));
396 return (ISC_R_UNEXPECTED
);
399 return (ISC_R_SUCCESS
);
403 journal_write(dns_journal_t
*j
, void *mem
, size_t nbytes
) {
406 result
= isc_stdio_write(mem
, 1, nbytes
, j
->fp
, NULL
);
407 if (result
!= ISC_R_SUCCESS
) {
408 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
410 j
->filename
, isc_result_totext(result
));
411 return (ISC_R_UNEXPECTED
);
414 return (ISC_R_SUCCESS
);
418 journal_fsync(dns_journal_t
*j
) {
420 result
= isc_stdio_flush(j
->fp
);
421 if (result
!= ISC_R_SUCCESS
) {
422 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
424 j
->filename
, isc_result_totext(result
));
425 return (ISC_R_UNEXPECTED
);
427 result
= isc_stdio_sync(j
->fp
);
428 if (result
!= ISC_R_SUCCESS
) {
429 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
431 j
->filename
, isc_result_totext(result
));
432 return (ISC_R_UNEXPECTED
);
434 return (ISC_R_SUCCESS
);
438 * Read/write a transaction header at the current file position.
442 journal_read_xhdr(dns_journal_t
*j
, journal_xhdr_t
*xhdr
) {
443 journal_rawxhdr_t raw
;
445 result
= journal_read(j
, &raw
, sizeof(raw
));
446 if (result
!= ISC_R_SUCCESS
)
448 xhdr
->size
= decode_uint32(raw
.size
);
449 xhdr
->serial0
= decode_uint32(raw
.serial0
);
450 xhdr
->serial1
= decode_uint32(raw
.serial1
);
451 return (ISC_R_SUCCESS
);
455 journal_write_xhdr(dns_journal_t
*j
, isc_uint32_t size
,
456 isc_uint32_t serial0
, isc_uint32_t serial1
)
458 journal_rawxhdr_t raw
;
459 encode_uint32(size
, raw
.size
);
460 encode_uint32(serial0
, raw
.serial0
);
461 encode_uint32(serial1
, raw
.serial1
);
462 return (journal_write(j
, &raw
, sizeof(raw
)));
467 * Read an RR header at the current file position.
471 journal_read_rrhdr(dns_journal_t
*j
, journal_rrhdr_t
*rrhdr
) {
472 journal_rawrrhdr_t raw
;
474 result
= journal_read(j
, &raw
, sizeof(raw
));
475 if (result
!= ISC_R_SUCCESS
)
477 rrhdr
->size
= decode_uint32(raw
.size
);
478 return (ISC_R_SUCCESS
);
482 journal_file_create(isc_mem_t
*mctx
, const char *filename
) {
485 journal_header_t header
;
486 journal_rawheader_t rawheader
;
487 int index_size
= 56; /* XXX configurable */
489 void *mem
; /* Memory for temporary index image. */
491 INSIST(sizeof(journal_rawheader_t
) == JOURNAL_HEADER_SIZE
);
493 result
= isc_stdio_open(filename
, "wb", &fp
);
494 if (result
!= ISC_R_SUCCESS
) {
495 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
497 filename
, isc_result_totext(result
));
498 return (ISC_R_UNEXPECTED
);
501 header
= initial_journal_header
;
502 header
.index_size
= index_size
;
503 journal_header_encode(&header
, &rawheader
);
505 size
= sizeof(journal_rawheader_t
) +
506 index_size
* sizeof(journal_rawpos_t
);
508 mem
= isc_mem_get(mctx
, size
);
510 (void)isc_stdio_close(fp
);
511 (void)isc_file_remove(filename
);
512 return (ISC_R_NOMEMORY
);
514 memset(mem
, 0, size
);
515 memcpy(mem
, &rawheader
, sizeof(rawheader
));
517 result
= isc_stdio_write(mem
, 1, (size_t) size
, fp
, NULL
);
518 if (result
!= ISC_R_SUCCESS
) {
519 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
521 filename
, isc_result_totext(result
));
522 (void)isc_stdio_close(fp
);
523 (void)isc_file_remove(filename
);
524 isc_mem_put(mctx
, mem
, size
);
525 return (ISC_R_UNEXPECTED
);
527 isc_mem_put(mctx
, mem
, size
);
529 result
= isc_stdio_close(fp
);
530 if (result
!= ISC_R_SUCCESS
) {
531 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
533 filename
, isc_result_totext(result
));
534 (void)isc_file_remove(filename
);
535 return (ISC_R_UNEXPECTED
);
538 return (ISC_R_SUCCESS
);
543 dns_journal_open(isc_mem_t
*mctx
, const char *filename
, isc_boolean_t write
,
544 dns_journal_t
**journalp
) {
547 journal_rawheader_t rawheader
;
550 INSIST(journalp
!= NULL
&& *journalp
== NULL
);
551 j
= isc_mem_get(mctx
, sizeof(*j
));
553 return (ISC_R_NOMEMORY
);
556 j
->state
= JOURNAL_STATE_INVALID
;
558 j
->filename
= filename
;
562 result
= isc_stdio_open(j
->filename
, write
? "rb+" : "rb", &fp
);
564 if (result
== ISC_R_FILENOTFOUND
) {
566 isc_log_write(JOURNAL_COMMON_LOGARGS
,
568 "journal file %s does not exist, "
571 CHECK(journal_file_create(mctx
, filename
));
575 result
= isc_stdio_open(j
->filename
, "rb+", &fp
);
577 FAIL(ISC_R_NOTFOUND
);
580 if (result
!= ISC_R_SUCCESS
) {
581 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
583 j
->filename
, isc_result_totext(result
));
584 FAIL(ISC_R_UNEXPECTED
);
590 * Set magic early so that seek/read can succeed.
592 j
->magic
= DNS_JOURNAL_MAGIC
;
594 CHECK(journal_seek(j
, 0));
595 CHECK(journal_read(j
, &rawheader
, sizeof(rawheader
)));
597 if (memcmp(rawheader
.h
.format
, initial_journal_header
.format
,
598 sizeof(initial_journal_header
.format
)) != 0) {
599 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
600 "%s: journal format not recognized",
602 FAIL(ISC_R_UNEXPECTED
);
604 journal_header_decode(&rawheader
, &j
->header
);
607 * If there is an index, read the raw index into a dynamically
608 * allocated buffer and then convert it into a cooked index.
610 if (j
->header
.index_size
!= 0) {
612 unsigned int rawbytes
;
615 rawbytes
= j
->header
.index_size
* sizeof(journal_rawpos_t
);
616 j
->rawindex
= isc_mem_get(mctx
, rawbytes
);
617 if (j
->rawindex
== NULL
)
618 FAIL(ISC_R_NOMEMORY
);
620 CHECK(journal_read(j
, j
->rawindex
, rawbytes
));
622 j
->index
= isc_mem_get(mctx
, j
->header
.index_size
*
623 sizeof(journal_pos_t
));
624 if (j
->index
== NULL
)
625 FAIL(ISC_R_NOMEMORY
);
628 for (i
= 0; i
< j
->header
.index_size
; i
++) {
629 j
->index
[i
].serial
= decode_uint32(p
);
631 j
->index
[i
].offset
= decode_uint32(p
);
634 INSIST(p
== j
->rawindex
+ rawbytes
);
636 j
->offset
= -1; /* Invalid, must seek explicitly. */
639 * Initialize the iterator.
641 dns_name_init(&j
->it
.name
, NULL
);
642 dns_rdata_init(&j
->it
.rdata
);
645 * Set up empty initial buffers for uncheched and checked
646 * wire format RR data. They will be reallocated
649 isc_buffer_init(&j
->it
.source
, NULL
, 0);
650 isc_buffer_init(&j
->it
.target
, NULL
, 0);
651 dns_decompress_init(&j
->it
.dctx
, -1, DNS_DECOMPRESS_NONE
);
654 write
? JOURNAL_STATE_WRITE
: JOURNAL_STATE_READ
;
657 return (ISC_R_SUCCESS
);
661 if (j
->index
!= NULL
) {
662 isc_mem_put(j
->mctx
, j
->index
, j
->header
.index_size
*
663 sizeof(journal_rawpos_t
));
667 (void)isc_stdio_close(j
->fp
);
668 isc_mem_put(j
->mctx
, j
, sizeof(*j
));
673 * A comparison function defining the sorting order for
674 * entries in the IXFR-style journal file.
676 * The IXFR format requires that deletions are sorted before
677 * additions, and within either one, SOA records are sorted
680 * Also sort the non-SOA records by type as a courtesy to the
681 * server receiving the IXFR - it may help reduce the amount of
682 * rdataset merging it has to do.
685 ixfr_order(const void *av
, const void *bv
) {
686 dns_difftuple_t
const * const *ap
= av
;
687 dns_difftuple_t
const * const *bp
= bv
;
688 dns_difftuple_t
const *a
= *ap
;
689 dns_difftuple_t
const *b
= *bp
;
692 r
= (b
->op
== DNS_DIFFOP_DEL
) - (a
->op
== DNS_DIFFOP_DEL
);
696 r
= (b
->rdata
.type
== dns_rdatatype_soa
) -
697 (a
->rdata
.type
== dns_rdatatype_soa
);
701 r
= (a
->rdata
.type
- b
->rdata
.type
);
706 * Advance '*pos' to the next journal transaction.
709 * *pos refers to a valid journal transaction.
712 * When ISC_R_SUCCESS is returned,
713 * *pos refers to the next journal transaction.
718 * ISC_R_NOMORE *pos pointed at the last transaction
719 * Other results due to file errors are possible.
722 journal_next(dns_journal_t
*j
, journal_pos_t
*pos
) {
725 REQUIRE(DNS_JOURNAL_VALID(j
));
727 result
= journal_seek(j
, pos
->offset
);
728 if (result
!= ISC_R_SUCCESS
)
732 * Read the header of the current transaction.
733 * This will return ISC_R_NOMORE if we are at EOF.
735 result
= journal_read_xhdr(j
, &xhdr
);
736 if (result
!= ISC_R_SUCCESS
)
740 * Check serial number consistency.
742 if (xhdr
.serial0
!= pos
->serial
) {
743 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
744 "%s: journal file corrupt: "
745 "expected serial %u, got %u",
746 j
->filename
, pos
->serial
, xhdr
.serial0
);
747 return (ISC_R_UNEXPECTED
);
751 * Check for offset wraparound.
753 if ((isc_offset_t
)(pos
->offset
+ sizeof(journal_rawxhdr_t
) + xhdr
.size
)
755 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
756 "%s: offset too large", j
->filename
);
757 return (ISC_R_UNEXPECTED
);
760 pos
->offset
+= sizeof(journal_rawxhdr_t
) + xhdr
.size
;
761 pos
->serial
= xhdr
.serial1
;
762 return (ISC_R_SUCCESS
);
766 * If the index of the journal 'j' contains an entry "better"
767 * than '*best_guess', replace '*best_guess' with it.
769 * "Better" means having a serial number closer to 'serial'
770 * but not greater than 'serial'.
773 index_find(dns_journal_t
*j
, isc_uint32_t serial
, journal_pos_t
*best_guess
) {
775 if (j
->index
== NULL
)
777 for (i
= 0; i
< j
->header
.index_size
; i
++) {
778 if (POS_VALID(j
->index
[i
]) &&
779 DNS_SERIAL_GE(serial
, j
->index
[i
].serial
) &&
780 DNS_SERIAL_GT(j
->index
[i
].serial
, best_guess
->serial
))
781 *best_guess
= j
->index
[i
];
786 * Add a new index entry. If there is no room, make room by removing
787 * the odd-numbered entries and compacting the others into the first
788 * half of the index. This decimates old index entries exponentially
789 * over time, so that the index always contains a much larger fraction
790 * of recent serial numbers than of old ones. This is deliberate -
791 * most index searches are for outgoing IXFR, and IXFR tends to request
792 * recent versions more often than old ones.
795 index_add(dns_journal_t
*j
, journal_pos_t
*pos
) {
797 if (j
->index
== NULL
)
800 * Search for a vacant position.
802 for (i
= 0; i
< j
->header
.index_size
; i
++) {
803 if (! POS_VALID(j
->index
[i
]))
806 if (i
== j
->header
.index_size
) {
809 * Found no vacant position. Make some room.
811 for (i
= 0; i
< j
->header
.index_size
; i
+= 2) {
812 j
->index
[k
++] = j
->index
[i
];
814 i
= k
; /* 'i' identifies the first vacant position. */
815 while (k
< j
->header
.index_size
) {
816 POS_INVALIDATE(j
->index
[k
]);
820 INSIST(i
< j
->header
.index_size
);
821 INSIST(! POS_VALID(j
->index
[i
]));
824 * Store the new index entry.
830 * Invalidate any existing index entries that could become
831 * ambiguous when a new transaction with number 'serial' is added.
834 index_invalidate(dns_journal_t
*j
, isc_uint32_t serial
) {
836 if (j
->index
== NULL
)
838 for (i
= 0; i
< j
->header
.index_size
; i
++) {
839 if (! DNS_SERIAL_GT(serial
, j
->index
[i
].serial
))
840 POS_INVALIDATE(j
->index
[i
]);
845 * Try to find a transaction with initial serial number 'serial'
846 * in the journal 'j'.
848 * If found, store its position at '*pos' and return ISC_R_SUCCESS.
850 * If 'serial' is current (= the ending serial number of the
851 * last transaction in the journal), set '*pos' to
852 * the position immediately following the last transaction and
853 * return ISC_R_SUCCESS.
855 * If 'serial' is within the range of addressable serial numbers
856 * covered by the journal but that particular serial number is missing
857 * (from the journal, not just from the index), return ISC_R_NOTFOUND.
859 * If 'serial' is outside the range of addressable serial numbers
860 * covered by the journal, return ISC_R_RANGE.
864 journal_find(dns_journal_t
*j
, isc_uint32_t serial
, journal_pos_t
*pos
) {
866 journal_pos_t current_pos
;
867 REQUIRE(DNS_JOURNAL_VALID(j
));
869 if (DNS_SERIAL_GT(j
->header
.begin
.serial
, serial
))
870 return (ISC_R_RANGE
);
871 if (DNS_SERIAL_GT(serial
, j
->header
.end
.serial
))
872 return (ISC_R_RANGE
);
873 if (serial
== j
->header
.end
.serial
) {
874 *pos
= j
->header
.end
;
875 return (ISC_R_SUCCESS
);
878 current_pos
= j
->header
.begin
;
879 index_find(j
, serial
, ¤t_pos
);
881 while (current_pos
.serial
!= serial
) {
882 if (DNS_SERIAL_GT(current_pos
.serial
, serial
))
883 return (ISC_R_NOTFOUND
);
884 result
= journal_next(j
, ¤t_pos
);
885 if (result
!= ISC_R_SUCCESS
)
889 return (ISC_R_SUCCESS
);
893 dns_journal_begin_transaction(dns_journal_t
*j
) {
896 journal_rawxhdr_t hdr
;
898 REQUIRE(DNS_JOURNAL_VALID(j
));
899 REQUIRE(j
->state
== JOURNAL_STATE_WRITE
);
902 * Find the file offset where the new transaction should
903 * be written, and seek there.
905 if (JOURNAL_EMPTY(&j
->header
)) {
906 offset
= sizeof(journal_rawheader_t
) +
907 j
->header
.index_size
* sizeof(journal_rawpos_t
);
909 offset
= j
->header
.end
.offset
;
911 j
->x
.pos
[0].offset
= offset
;
912 j
->x
.pos
[1].offset
= offset
; /* Initial value, will be incremented. */
915 CHECK(journal_seek(j
, offset
));
918 * Write a dummy transaction header of all zeroes to reserve
919 * space. It will be filled in when the transaction is
922 memset(&hdr
, 0, sizeof(hdr
));
923 CHECK(journal_write(j
, &hdr
, sizeof(hdr
)));
924 j
->x
.pos
[1].offset
= j
->offset
;
926 j
->state
= JOURNAL_STATE_TRANSACTION
;
927 result
= ISC_R_SUCCESS
;
933 dns_journal_writediff(dns_journal_t
*j
, dns_diff_t
*diff
) {
941 REQUIRE(DNS_DIFF_VALID(diff
));
942 REQUIRE(j
->state
== JOURNAL_STATE_TRANSACTION
);
944 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "writing to journal");
945 dns_diff_print(diff
, NULL
);
948 * Pass 1: determine the buffer size needed, and
949 * keep track of SOA serial numbers.
952 for (t
= ISC_LIST_HEAD(diff
->tuples
); t
!= NULL
;
953 t
= ISC_LIST_NEXT(t
, link
))
955 if (t
->rdata
.type
== dns_rdatatype_soa
) {
957 j
->x
.pos
[j
->x
.n_soa
].serial
=
958 dns_soa_getserial(&t
->rdata
);
961 size
+= sizeof(journal_rawrrhdr_t
);
962 size
+= t
->name
.length
; /* XXX should have access macro? */
964 size
+= t
->rdata
.length
;
967 mem
= isc_mem_get(j
->mctx
, size
);
969 return (ISC_R_NOMEMORY
);
971 isc_buffer_init(&buffer
, mem
, size
);
974 * Pass 2. Write RRs to buffer.
976 for (t
= ISC_LIST_HEAD(diff
->tuples
); t
!= NULL
;
977 t
= ISC_LIST_NEXT(t
, link
))
980 * Write the RR header.
982 isc_buffer_putuint32(&buffer
, t
->name
.length
+ 10 +
985 * Write the owner name, RR header, and RR data.
987 isc_buffer_putmem(&buffer
, t
->name
.ndata
, t
->name
.length
);
988 isc_buffer_putuint16(&buffer
, t
->rdata
.type
);
989 isc_buffer_putuint16(&buffer
, t
->rdata
.rdclass
);
990 isc_buffer_putuint32(&buffer
, t
->ttl
);
991 INSIST(t
->rdata
.length
< 65536);
992 isc_buffer_putuint16(&buffer
, (isc_uint16_t
)t
->rdata
.length
);
993 INSIST(isc_buffer_availablelength(&buffer
) >= t
->rdata
.length
);
994 isc_buffer_putmem(&buffer
, t
->rdata
.data
, t
->rdata
.length
);
997 isc_buffer_usedregion(&buffer
, &used
);
998 INSIST(used
.length
== size
);
1000 j
->x
.pos
[1].offset
+= used
.length
;
1003 * Write the buffer contents to the journal file.
1005 CHECK(journal_write(j
, used
.base
, used
.length
));
1007 result
= ISC_R_SUCCESS
;
1011 isc_mem_put(j
->mctx
, mem
, size
);
1017 dns_journal_commit(dns_journal_t
*j
) {
1018 isc_result_t result
;
1019 journal_rawheader_t rawheader
;
1021 REQUIRE(DNS_JOURNAL_VALID(j
));
1022 REQUIRE(j
->state
== JOURNAL_STATE_TRANSACTION
);
1025 * Perform some basic consistency checks.
1027 if (j
->x
.n_soa
!= 2) {
1028 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1029 "malformed transaction: %d SOAs",
1031 return (ISC_R_UNEXPECTED
);
1033 if (! (DNS_SERIAL_GT(j
->x
.pos
[1].serial
, j
->x
.pos
[0].serial
) ||
1035 j
->x
.pos
[1].serial
== j
->x
.pos
[0].serial
)))
1037 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1038 "malformed transaction: serial number "
1040 return (ISC_R_UNEXPECTED
);
1042 if (! JOURNAL_EMPTY(&j
->header
)) {
1043 if (j
->x
.pos
[0].serial
!= j
->header
.end
.serial
) {
1044 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1045 "malformed transaction: "
1046 "%s last serial %u != "
1047 "transaction first serial %u",
1049 j
->header
.end
.serial
,
1050 j
->x
.pos
[0].serial
);
1051 return (ISC_R_UNEXPECTED
);
1056 * Some old journal entries may become non-addressable
1057 * when we increment the current serial number. Purge them
1058 * by stepping header.begin forward to the first addressable
1059 * transaction. Also purge them from the index.
1061 if (! JOURNAL_EMPTY(&j
->header
)) {
1062 while (! DNS_SERIAL_GT(j
->x
.pos
[1].serial
,
1063 j
->header
.begin
.serial
)) {
1064 CHECK(journal_next(j
, &j
->header
.begin
));
1066 index_invalidate(j
, j
->x
.pos
[1].serial
);
1069 if (DNS_SERIAL_GT(last_dumped_serial
, j
->x
.pos
[1].serial
)) {
1075 * Commit the transaction data to stable storage.
1077 CHECK(journal_fsync(j
));
1080 * Update the transaction header.
1082 CHECK(journal_seek(j
, j
->x
.pos
[0].offset
));
1083 CHECK(journal_write_xhdr(j
, (j
->x
.pos
[1].offset
- j
->x
.pos
[0].offset
) -
1084 sizeof(journal_rawxhdr_t
),
1085 j
->x
.pos
[0].serial
, j
->x
.pos
[1].serial
));
1088 * Update the journal header.
1090 if (JOURNAL_EMPTY(&j
->header
)) {
1091 j
->header
.begin
= j
->x
.pos
[0];
1093 j
->header
.end
= j
->x
.pos
[1];
1094 journal_header_encode(&j
->header
, &rawheader
);
1095 CHECK(journal_seek(j
, 0));
1096 CHECK(journal_write(j
, &rawheader
, sizeof(rawheader
)));
1101 index_add(j
, &j
->x
.pos
[0]);
1104 * Convert the index into on-disk format and write
1107 if (j
->header
.index_size
!= 0) {
1110 unsigned int rawbytes
;
1112 rawbytes
= j
->header
.index_size
* sizeof(journal_rawpos_t
);
1115 for (i
= 0; i
< j
->header
.index_size
; i
++) {
1116 encode_uint32(j
->index
[i
].serial
, p
);
1118 encode_uint32(j
->index
[i
].offset
, p
);
1121 INSIST(p
== j
->rawindex
+ rawbytes
);
1123 CHECK(journal_write(j
, j
->rawindex
, rawbytes
));
1127 * Commit the header to stable storage.
1129 CHECK(journal_fsync(j
));
1132 * We no longer have a transaction open.
1134 j
->state
= JOURNAL_STATE_WRITE
;
1136 result
= ISC_R_SUCCESS
;
1143 dns_journal_write_transaction(dns_journal_t
*j
, dns_diff_t
*diff
) {
1144 isc_result_t result
;
1145 CHECK(dns_diff_sort(diff
, ixfr_order
));
1146 CHECK(dns_journal_begin_transaction(j
));
1147 CHECK(dns_journal_writediff(j
, diff
));
1148 CHECK(dns_journal_commit(j
));
1149 result
= ISC_R_SUCCESS
;
1155 dns_journal_destroy(dns_journal_t
**journalp
) {
1156 dns_journal_t
*j
= *journalp
;
1157 REQUIRE(DNS_JOURNAL_VALID(j
));
1159 j
->it
.result
= ISC_R_FAILURE
;
1160 dns_name_invalidate(&j
->it
.name
);
1161 dns_decompress_invalidate(&j
->it
.dctx
);
1162 if (j
->rawindex
!= NULL
)
1163 isc_mem_put(j
->mctx
, j
->rawindex
, j
->header
.index_size
*
1164 sizeof(journal_rawpos_t
));
1165 if (j
->index
!= NULL
)
1166 isc_mem_put(j
->mctx
, j
->index
, j
->header
.index_size
*
1167 sizeof(journal_pos_t
));
1168 if (j
->it
.target
.base
!= NULL
)
1169 isc_mem_put(j
->mctx
, j
->it
.target
.base
, j
->it
.target
.length
);
1170 if (j
->it
.source
.base
!= NULL
)
1171 isc_mem_put(j
->mctx
, j
->it
.source
.base
, j
->it
.source
.length
);
1174 (void)isc_stdio_close(j
->fp
);
1176 isc_mem_put(j
->mctx
, j
, sizeof(*j
));
1181 * Roll the open journal 'j' into the database 'db'.
1182 * A new database version will be created.
1185 /* XXX Share code with incoming IXFR? */
1188 roll_forward(dns_journal_t
*j
, dns_db_t
*db
) {
1189 isc_buffer_t source
; /* Transaction data from disk */
1190 isc_buffer_t target
; /* Ditto after _fromwire check */
1191 isc_uint32_t db_serial
; /* Database SOA serial */
1192 isc_uint32_t end_serial
; /* Last journal SOA serial */
1193 isc_result_t result
;
1194 dns_dbversion_t
*ver
= NULL
;
1197 unsigned int n_soa
= 0;
1198 unsigned int n_put
= 0;
1200 REQUIRE(DNS_JOURNAL_VALID(j
));
1201 REQUIRE(DNS_DB_VALID(db
));
1203 dns_diff_init(j
->mctx
, &diff
);
1206 * Set up empty initial buffers for uncheched and checked
1207 * wire format transaction data. They will be reallocated
1210 isc_buffer_init(&source
, NULL
, 0);
1211 isc_buffer_init(&target
, NULL
, 0);
1214 * Create the new database version.
1216 CHECK(dns_db_newversion(db
, &ver
));
1219 * Get the current database SOA serial number.
1221 CHECK(dns_db_getsoaserial(db
, ver
, &db_serial
));
1224 * Locate a journal entry for the current database serial.
1226 CHECK(journal_find(j
, db_serial
, &pos
));
1228 * XXX do more drastic things, like marking zone stale,
1232 * XXXRTH The zone code should probably mark the zone as bad and
1233 * scream loudly into the log if this is a dynamic update
1234 * log reply that failed.
1237 end_serial
= dns_journal_last_serial(j
);
1238 if (db_serial
== end_serial
)
1239 CHECK(DNS_R_UPTODATE
);
1241 CHECK(dns_journal_iter_init(j
, db_serial
, end_serial
));
1243 for (result
= dns_journal_first_rr(j
);
1244 result
== ISC_R_SUCCESS
;
1245 result
= dns_journal_next_rr(j
))
1250 dns_difftuple_t
*tuple
= NULL
;
1254 dns_journal_current_rr(j
, &name
, &ttl
, &rdata
);
1256 if (rdata
->type
== dns_rdatatype_soa
)
1262 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1263 "%s: journal file corrupt: missing "
1264 "initial SOA", j
->filename
);
1265 FAIL(ISC_R_UNEXPECTED
);
1267 CHECK(dns_difftuple_create(diff
.mctx
, n_soa
== 1 ?
1268 DNS_DIFFOP_DEL
: DNS_DIFFOP_ADD
,
1269 name
, ttl
, rdata
, &tuple
));
1270 dns_diff_append(&diff
, &tuple
);
1272 if (++n_put
> 100) {
1273 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1274 "applying diff to database");
1275 dns_diff_print(&diff
, NULL
);
1276 CHECK(dns_diff_apply(&diff
, db
, ver
));
1277 dns_diff_clear(&diff
);
1281 if (result
== ISC_R_NOMORE
)
1282 result
= ISC_R_SUCCESS
;
1286 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1287 "applying final diff to database");
1288 dns_diff_print(&diff
, NULL
);
1289 CHECK(dns_diff_apply(&diff
, db
, ver
));
1290 dns_diff_clear(&diff
);
1295 dns_db_closeversion(db
, &ver
, result
== ISC_R_SUCCESS
?
1296 ISC_TRUE
: ISC_FALSE
);
1298 if (source
.base
!= NULL
)
1299 isc_mem_put(j
->mctx
, source
.base
, source
.length
);
1300 if (target
.base
!= NULL
)
1301 isc_mem_put(j
->mctx
, target
.base
, target
.length
);
1303 dns_diff_clear(&diff
);
1309 dns_journal_rollforward(isc_mem_t
*mctx
, dns_db_t
*db
, const char *filename
) {
1311 isc_result_t result
;
1313 REQUIRE(DNS_DB_VALID(db
));
1314 REQUIRE(filename
!= NULL
);
1317 result
= dns_journal_open(mctx
, filename
, ISC_FALSE
, &j
);
1318 if (result
== ISC_R_NOTFOUND
) {
1319 isc_log_write(JOURNAL_DEBUG_LOGARGS(3),
1320 "no journal file, but that's OK");
1321 return (DNS_R_NOJOURNAL
);
1323 if (result
!= ISC_R_SUCCESS
)
1325 if (JOURNAL_EMPTY(&j
->header
))
1326 result
= DNS_R_UPTODATE
;
1328 result
= roll_forward(j
, db
);
1330 dns_journal_destroy(&j
);
1336 dns_journal_print(isc_mem_t
*mctx
, const char *filename
, FILE *file
) {
1338 isc_buffer_t source
; /* Transaction data from disk */
1339 isc_buffer_t target
; /* Ditto after _fromwire check */
1340 isc_uint32_t start_serial
; /* Database SOA serial */
1341 isc_uint32_t end_serial
; /* Last journal SOA serial */
1342 isc_result_t result
;
1344 unsigned int n_soa
= 0;
1345 unsigned int n_put
= 0;
1347 REQUIRE(filename
!= NULL
);
1350 result
= dns_journal_open(mctx
, filename
, ISC_FALSE
, &j
);
1351 if (result
== ISC_R_NOTFOUND
) {
1352 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no journal file");
1353 return (DNS_R_NOJOURNAL
);
1356 if (result
!= ISC_R_SUCCESS
) {
1357 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1358 "journal open failure");
1362 dns_diff_init(j
->mctx
, &diff
);
1365 * Set up empty initial buffers for uncheched and checked
1366 * wire format transaction data. They will be reallocated
1369 isc_buffer_init(&source
, NULL
, 0);
1370 isc_buffer_init(&target
, NULL
, 0);
1372 start_serial
= dns_journal_first_serial(j
);
1373 end_serial
= dns_journal_last_serial(j
);
1375 CHECK(dns_journal_iter_init(j
, start_serial
, end_serial
));
1377 for (result
= dns_journal_first_rr(j
);
1378 result
== ISC_R_SUCCESS
;
1379 result
= dns_journal_next_rr(j
))
1384 dns_difftuple_t
*tuple
= NULL
;
1388 dns_journal_current_rr(j
, &name
, &ttl
, &rdata
);
1390 if (rdata
->type
== dns_rdatatype_soa
)
1396 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1397 "%s: journal file corrupt: missing "
1398 "initial SOA", j
->filename
);
1399 FAIL(ISC_R_UNEXPECTED
);
1401 CHECK(dns_difftuple_create(diff
.mctx
, n_soa
== 1 ?
1402 DNS_DIFFOP_DEL
: DNS_DIFFOP_ADD
,
1403 name
, ttl
, rdata
, &tuple
));
1404 dns_diff_append(&diff
, &tuple
);
1406 if (++n_put
> 100) {
1407 result
= dns_diff_print(&diff
, file
);
1408 dns_diff_clear(&diff
);
1410 if (result
!= ISC_R_SUCCESS
)
1414 if (result
== ISC_R_NOMORE
)
1415 result
= ISC_R_SUCCESS
;
1419 result
= dns_diff_print(&diff
, file
);
1420 dns_diff_clear(&diff
);
1425 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1426 "%s: cannot print: journal file corrupt", j
->filename
);
1429 if (source
.base
!= NULL
)
1430 isc_mem_put(j
->mctx
, source
.base
, source
.length
);
1431 if (target
.base
!= NULL
)
1432 isc_mem_put(j
->mctx
, target
.base
, target
.length
);
1434 dns_diff_clear(&diff
);
1435 dns_journal_destroy(&j
);
1440 /**************************************************************************/
1442 * Miscellaneous accessors.
1444 isc_uint32_t
dns_journal_first_serial(dns_journal_t
*j
) {
1445 return (j
->header
.begin
.serial
);
1448 isc_uint32_t
dns_journal_last_serial(dns_journal_t
*j
) {
1449 return (j
->header
.end
.serial
);
1452 /**************************************************************************/
1454 * Iteration support.
1456 * When serving an outgoing IXFR, we transmit a part the journal starting
1457 * at the serial number in the IXFR request and ending at the serial
1458 * number that is current when the IXFR request arrives. The ending
1459 * serial number is not necessarily at the end of the journal:
1460 * the journal may grow while the IXFR is in progress, but we stop
1461 * when we reach the serial number that was current when the IXFR started.
1464 static isc_result_t
read_one_rr(dns_journal_t
*j
);
1467 * Make sure the buffer 'b' is has at least 'size' bytes
1468 * allocated, and clear it.
1471 * Either b->base is NULL, or it points to b->length bytes of memory
1472 * previously allocated by isc_mem_get().
1476 size_buffer(isc_mem_t
*mctx
, isc_buffer_t
*b
, unsigned size
) {
1477 if (b
->length
< size
) {
1478 void *mem
= isc_mem_get(mctx
, size
);
1480 return (ISC_R_NOMEMORY
);
1481 if (b
->base
!= NULL
)
1482 isc_mem_put(mctx
, b
->base
, b
->length
);
1486 isc_buffer_clear(b
);
1487 return (ISC_R_SUCCESS
);
1491 dns_journal_iter_init(dns_journal_t
*j
,
1492 isc_uint32_t begin_serial
, isc_uint32_t end_serial
)
1494 isc_result_t result
;
1496 CHECK(journal_find(j
, begin_serial
, &j
->it
.bpos
));
1497 INSIST(j
->it
.bpos
.serial
== begin_serial
);
1499 CHECK(journal_find(j
, end_serial
, &j
->it
.epos
));
1500 INSIST(j
->it
.epos
.serial
== end_serial
);
1502 result
= ISC_R_SUCCESS
;
1504 j
->it
.result
= result
;
1505 return (j
->it
.result
);
1510 dns_journal_first_rr(dns_journal_t
*j
) {
1511 isc_result_t result
;
1514 * Seek to the beginning of the first transaction we are
1517 CHECK(journal_seek(j
, j
->it
.bpos
.offset
));
1518 j
->it
.current_serial
= j
->it
.bpos
.serial
;
1520 j
->it
.xsize
= 0; /* We have no transaction data yet... */
1521 j
->it
.xpos
= 0; /* ...and haven't used any of it. */
1523 return (read_one_rr(j
));
1530 read_one_rr(dns_journal_t
*j
) {
1531 isc_result_t result
;
1533 dns_rdatatype_t rdtype
;
1534 dns_rdataclass_t rdclass
;
1537 journal_xhdr_t xhdr
;
1538 journal_rrhdr_t rrhdr
;
1540 INSIST(j
->offset
<= j
->it
.epos
.offset
);
1541 if (j
->offset
== j
->it
.epos
.offset
)
1542 return (ISC_R_NOMORE
);
1543 if (j
->it
.xpos
== j
->it
.xsize
) {
1545 * We are at a transaction boundary.
1546 * Read another transaction header.
1548 CHECK(journal_read_xhdr(j
, &xhdr
));
1549 if (xhdr
.size
== 0) {
1550 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1551 "journal corrupt: empty transaction");
1552 FAIL(ISC_R_UNEXPECTED
);
1554 if (xhdr
.serial0
!= j
->it
.current_serial
) {
1555 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1556 "%s: journal file corrupt: "
1557 "expected serial %u, got %u",
1559 j
->it
.current_serial
, xhdr
.serial0
);
1560 FAIL(ISC_R_UNEXPECTED
);
1562 j
->it
.xsize
= xhdr
.size
;
1568 result
= journal_read_rrhdr(j
, &rrhdr
);
1570 * Perform a sanity check on the journal RR size.
1571 * The smallest possible RR has a 1-byte owner name
1572 * and a 10-byte header. The largest possible
1573 * RR has 65535 bytes of data, a header, and a maximum-
1574 * size owner name, well below 70 k total.
1576 if (rrhdr
.size
< 1+10 || rrhdr
.size
> 70000) {
1577 isc_log_write(JOURNAL_COMMON_LOGARGS
, ISC_LOG_ERROR
,
1578 "%s: journal corrupt: impossible RR size "
1579 "(%d bytes)", j
->filename
, rrhdr
.size
);
1580 FAIL(ISC_R_UNEXPECTED
);
1583 CHECK(size_buffer(j
->mctx
, &j
->it
.source
, rrhdr
.size
));
1584 CHECK(journal_read(j
, j
->it
.source
.base
, rrhdr
.size
));
1585 isc_buffer_add(&j
->it
.source
, rrhdr
.size
);
1588 * The target buffer is made the same size
1589 * as the source buffer, with the assumption that when
1590 * no compression in present, the output of dns_*_fromwire()
1591 * is no larger than the input.
1593 CHECK(size_buffer(j
->mctx
, &j
->it
.target
, rrhdr
.size
));
1596 * Parse the owner name. We don't know where it
1597 * ends yet, so we make the entire "remaining"
1598 * part of the buffer "active".
1600 isc_buffer_setactive(&j
->it
.source
,
1601 j
->it
.source
.used
- j
->it
.source
.current
);
1602 CHECK(dns_name_fromwire(&j
->it
.name
, &j
->it
.source
,
1603 &j
->it
.dctx
, ISC_FALSE
, &j
->it
.target
));
1606 * Check that the RR header is there, and parse it.
1608 if (isc_buffer_remaininglength(&j
->it
.source
) < 10)
1609 FAIL(DNS_R_FORMERR
);
1611 rdtype
= isc_buffer_getuint16(&j
->it
.source
);
1612 rdclass
= isc_buffer_getuint16(&j
->it
.source
);
1613 ttl
= isc_buffer_getuint32(&j
->it
.source
);
1614 rdlen
= isc_buffer_getuint16(&j
->it
.source
);
1619 isc_buffer_setactive(&j
->it
.source
, rdlen
);
1620 dns_rdata_reset(&j
->it
.rdata
);
1621 CHECK(dns_rdata_fromwire(&j
->it
.rdata
, rdclass
,
1622 rdtype
, &j
->it
.source
, &j
->it
.dctx
,
1623 ISC_FALSE
, &j
->it
.target
));
1626 j
->it
.xpos
+= sizeof(journal_rawrrhdr_t
) + rrhdr
.size
;
1627 if (rdtype
== dns_rdatatype_soa
) {
1628 /* XXX could do additional consistency checks here */
1629 j
->it
.current_serial
= dns_soa_getserial(&j
->it
.rdata
);
1632 result
= ISC_R_SUCCESS
;
1635 j
->it
.result
= result
;
1640 dns_journal_next_rr(dns_journal_t
*j
) {
1641 j
->it
.result
= read_one_rr(j
);
1642 return (j
->it
.result
);
1646 dns_journal_current_rr(dns_journal_t
*j
, dns_name_t
**name
, isc_uint32_t
*ttl
,
1647 dns_rdata_t
**rdata
)
1649 REQUIRE(j
->it
.result
== ISC_R_SUCCESS
);
1650 *name
= &j
->it
.name
;
1652 *rdata
= &j
->it
.rdata
;
1655 /**************************************************************************/
1657 * Generating diffs from databases
1661 * Construct a diff containing all the RRs at the current name of the
1662 * database iterator 'dbit' in database 'db', version 'ver'.
1663 * Set '*name' to the current name, and append the diff to 'diff'.
1664 * All new tuples will have the operation 'op'.
1666 * Requires: 'name' must have buffer large enough to hold the name.
1667 * Typically, a dns_fixedname_t would be used.
1670 get_name_diff(dns_db_t
*db
, dns_dbversion_t
*ver
, isc_stdtime_t now
,
1671 dns_dbiterator_t
*dbit
, dns_name_t
*name
, dns_diffop_t op
,
1674 isc_result_t result
;
1675 dns_dbnode_t
*node
= NULL
;
1676 dns_rdatasetiter_t
*rdsiter
= NULL
;
1677 dns_difftuple_t
*tuple
= NULL
;
1679 result
= dns_dbiterator_current(dbit
, &node
, name
);
1680 if (result
!= ISC_R_SUCCESS
)
1683 result
= dns_db_allrdatasets(db
, node
, ver
, now
, &rdsiter
);
1684 if (result
!= ISC_R_SUCCESS
)
1687 for (result
= dns_rdatasetiter_first(rdsiter
);
1688 result
== ISC_R_SUCCESS
;
1689 result
= dns_rdatasetiter_next(rdsiter
))
1691 dns_rdataset_t rdataset
;
1693 dns_rdataset_init(&rdataset
);
1694 dns_rdatasetiter_current(rdsiter
, &rdataset
);
1696 for (result
= dns_rdataset_first(&rdataset
);
1697 result
== ISC_R_SUCCESS
;
1698 result
= dns_rdataset_next(&rdataset
))
1700 dns_rdata_t rdata
= DNS_RDATA_INIT
;
1701 dns_rdataset_current(&rdataset
, &rdata
);
1702 result
= dns_difftuple_create(diff
->mctx
, op
, name
,
1703 rdataset
.ttl
, &rdata
,
1705 if (result
!= ISC_R_SUCCESS
) {
1706 dns_rdataset_disassociate(&rdataset
);
1707 goto cleanup_iterator
;
1709 dns_diff_append(diff
, &tuple
);
1711 dns_rdataset_disassociate(&rdataset
);
1712 if (result
!= ISC_R_NOMORE
)
1713 goto cleanup_iterator
;
1715 if (result
!= ISC_R_NOMORE
)
1716 goto cleanup_iterator
;
1718 result
= ISC_R_SUCCESS
;
1721 dns_rdatasetiter_destroy(&rdsiter
);
1724 dns_db_detachnode(db
, &node
);
1730 * Comparison function for use by dns_diff_subtract when sorting
1731 * the diffs to be subtracted. The sort keys are the rdata type
1732 * and the rdata itself. The owner name is ignored, because
1733 * it is known to be the same for all tuples.
1736 rdata_order(const void *av
, const void *bv
) {
1737 dns_difftuple_t
const * const *ap
= av
;
1738 dns_difftuple_t
const * const *bp
= bv
;
1739 dns_difftuple_t
const *a
= *ap
;
1740 dns_difftuple_t
const *b
= *bp
;
1742 r
= (b
->rdata
.type
- a
->rdata
.type
);
1745 r
= dns_rdata_compare(&a
->rdata
, &b
->rdata
);
1750 dns_diff_subtract(dns_diff_t diff
[2], dns_diff_t
*r
) {
1751 isc_result_t result
;
1752 dns_difftuple_t
*p
[2];
1754 CHECK(dns_diff_sort(&diff
[0], rdata_order
));
1755 CHECK(dns_diff_sort(&diff
[1], rdata_order
));
1758 p
[0] = ISC_LIST_HEAD(diff
[0].tuples
);
1759 p
[1] = ISC_LIST_HEAD(diff
[1].tuples
);
1760 if (p
[0] == NULL
&& p
[1] == NULL
)
1763 for (i
= 0; i
< 2; i
++)
1764 if (p
[!i
] == NULL
) {
1765 ISC_LIST_UNLINK(diff
[i
].tuples
, p
[i
], link
);
1766 ISC_LIST_APPEND(r
->tuples
, p
[i
], link
);
1769 t
= rdata_order(&p
[0], &p
[1]);
1771 ISC_LIST_UNLINK(diff
[0].tuples
, p
[0], link
);
1772 ISC_LIST_APPEND(r
->tuples
, p
[0], link
);
1776 ISC_LIST_UNLINK(diff
[1].tuples
, p
[1], link
);
1777 ISC_LIST_APPEND(r
->tuples
, p
[1], link
);
1782 * Identical RRs in both databases; skip them both.
1784 for (i
= 0; i
< 2; i
++) {
1785 ISC_LIST_UNLINK(diff
[i
].tuples
, p
[i
], link
);
1786 dns_difftuple_free(&p
[i
]);
1790 result
= ISC_R_SUCCESS
;
1796 * Compare the databases 'dba' and 'dbb' and generate a journal
1797 * entry containing the changes to make 'dba' from 'dbb' (note
1798 * the order). This journal entry will consist of a single,
1799 * possibly very large transaction.
1803 dns_db_diff(isc_mem_t
*mctx
,
1804 dns_db_t
*dba
, dns_dbversion_t
*dbvera
,
1805 dns_db_t
*dbb
, dns_dbversion_t
*dbverb
,
1806 const char *journal_filename
)
1809 dns_dbversion_t
*ver
[2];
1810 dns_dbiterator_t
*dbit
[2] = { NULL
, NULL
};
1811 isc_boolean_t have
[2] = { ISC_FALSE
, ISC_FALSE
};
1812 dns_fixedname_t fixname
[2];
1813 isc_result_t result
, itresult
[2];
1814 dns_diff_t diff
[2], resultdiff
;
1816 dns_journal_t
*journal
= NULL
;
1818 db
[0] = dba
, db
[1] = dbb
;
1819 ver
[0] = dbvera
, ver
[1] = dbverb
;
1821 dns_diff_init(mctx
, &diff
[0]);
1822 dns_diff_init(mctx
, &diff
[1]);
1823 dns_diff_init(mctx
, &resultdiff
);
1825 dns_fixedname_init(&fixname
[0]);
1826 dns_fixedname_init(&fixname
[1]);
1828 CHECK(dns_journal_open(mctx
, journal_filename
, ISC_TRUE
, &journal
));
1830 CHECK(dns_db_createiterator(db
[0], ISC_FALSE
, &dbit
[0]));
1831 CHECK(dns_db_createiterator(db
[1], ISC_FALSE
, &dbit
[1]));
1833 itresult
[0] = dns_dbiterator_first(dbit
[0]);
1834 itresult
[1] = dns_dbiterator_first(dbit
[1]);
1837 for (i
= 0; i
< 2; i
++) {
1838 if (! have
[i
] && itresult
[i
] == ISC_R_SUCCESS
) {
1839 CHECK(get_name_diff(db
[i
], ver
[i
], 0, dbit
[i
],
1840 dns_fixedname_name(&fixname
[i
]),
1845 itresult
[i
] = dns_dbiterator_next(dbit
[i
]);
1850 if (! have
[0] && ! have
[1]) {
1851 INSIST(ISC_LIST_EMPTY(diff
[0].tuples
));
1852 INSIST(ISC_LIST_EMPTY(diff
[1].tuples
));
1856 for (i
= 0; i
< 2; i
++) {
1858 ISC_LIST_APPENDLIST(resultdiff
.tuples
,
1859 diff
[i
].tuples
, link
);
1860 INSIST(ISC_LIST_EMPTY(diff
[i
].tuples
));
1861 have
[i
] = ISC_FALSE
;
1866 t
= dns_name_compare(dns_fixedname_name(&fixname
[0]),
1867 dns_fixedname_name(&fixname
[1]));
1869 ISC_LIST_APPENDLIST(resultdiff
.tuples
,
1870 diff
[0].tuples
, link
);
1871 INSIST(ISC_LIST_EMPTY(diff
[0].tuples
));
1872 have
[0] = ISC_FALSE
;
1876 ISC_LIST_APPENDLIST(resultdiff
.tuples
,
1877 diff
[1].tuples
, link
);
1878 INSIST(ISC_LIST_EMPTY(diff
[1].tuples
));
1879 have
[1] = ISC_FALSE
;
1883 CHECK(dns_diff_subtract(diff
, &resultdiff
));
1884 INSIST(ISC_LIST_EMPTY(diff
[0].tuples
));
1885 INSIST(ISC_LIST_EMPTY(diff
[1].tuples
));
1886 have
[0] = have
[1] = ISC_FALSE
;
1889 if (itresult
[0] != ISC_R_NOMORE
)
1891 if (itresult
[1] != ISC_R_NOMORE
)
1894 if (ISC_LIST_EMPTY(resultdiff
.tuples
)) {
1895 isc_log_write(JOURNAL_DEBUG_LOGARGS(3), "no changes");
1897 CHECK(dns_journal_write_transaction(journal
, &resultdiff
));
1899 INSIST(ISC_LIST_EMPTY(diff
[0].tuples
));
1900 INSIST(ISC_LIST_EMPTY(diff
[1].tuples
));
1901 dns_diff_clear(&resultdiff
);
1904 dns_dbiterator_destroy(&dbit
[0]);
1905 dns_dbiterator_destroy(&dbit
[1]);
1906 dns_journal_destroy(&journal
);