Fix whitespace irregularities in code
[xapian.git] / xapian-core / backends / glass / glass_table.h
bloba495794d7f878086f919b993488ae65ced5a4be4
1 /** @file glass_table.h
2 * @brief Btree implementation
3 */
4 /* Copyright 1999,2000,2001 BrightStation PLC
5 * Copyright 2002,2003,2004,2005,2006,2007,2008,2009,2010,2012,2013,2014,2015,2016 Olly Betts
6 * Copyright 2008 Lemur Consulting Ltd
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License as
10 * published by the Free Software Foundation; either version 2 of the
11 * License, or (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21 * USA
24 #ifndef OM_HGUARD_GLASS_TABLE_H
25 #define OM_HGUARD_GLASS_TABLE_H
27 #include <xapian/constants.h>
28 #include <xapian/error.h>
30 #include "glass_freelist.h"
31 #include "glass_cursor.h"
32 #include "glass_defs.h"
34 #include "io_utils.h"
35 #include "noreturn.h"
36 #include "omassert.h"
37 #include "str.h"
38 #include "stringutils.h"
39 #include "wordaccess.h"
41 #include "common/compression_stream.h"
43 #include <algorithm>
44 #include <string>
46 namespace Glass {
48 /** Even for items of at maximum size, it must be possible to get this number of
49 * items in a block */
50 const size_t BLOCK_CAPACITY = 4;
52 /** The largest possible value of a key_len.
54 * This gives the upper limit of the size of a key that may be stored in the
55 * B-tree.
57 #define GLASS_BTREE_MAX_KEY_LEN 255
59 // FIXME: This named constant probably isn't used everywhere it should be...
60 const int BYTES_PER_BLOCK_NUMBER = 4;
62 /* The B-tree blocks have a number of internal lengths and offsets held in 1, 2
63 or 4 bytes. To make the coding a little clearer,
64 we use for
65 ------ ---
66 K1 the 1 byte length of key
67 I2 the 2 byte length of an item (key-tag pair)
68 D2 the 2 byte offset to the item from the directory
69 X2 the 2 byte component counter that ends each key
72 const int K1 = 1;
73 const int I2 = 2;
74 const int D2 = 2;
75 const int X2 = 2;
77 /* and when getting or setting them, we use these methods of the various
78 * *Item* classes: */
80 // getD(p, c)
81 // setD(p, c, x)
82 // getI()
83 // setI(x)
84 // getX(p, c)
85 // setX(p, c, x)
87 /* if you've been reading the comments from the top, the next four procedures
88 will not cause any headaches.
90 Recall that a leaf item has this form:
92 i k x
93 | | |
94 I K key X tag
95 ←K→
96 <---SIZE---->
97 <---I--->
99 Except that X is omitted for the first component of a tag (there is a flag
100 bit in the upper bits of I which indicates these).
102 item_of(p, c) returns i, the address of the item at block address p,
103 directory offset c,
105 component_of(p, c) returns the number marked 'x' above,
107 last_component(p, c) returns true if this is a final component.
110 inline uint4 REVISION(const byte * b) { return aligned_read4(b); }
111 inline int GET_LEVEL(const byte * b) { return b[4]; }
112 inline int MAX_FREE(const byte * b) { return unaligned_read2(b + 5); }
113 inline int TOTAL_FREE(const byte * b) { return unaligned_read2(b + 7); }
114 inline int DIR_END(const byte * b) { return unaligned_read2(b + 9); }
115 const int DIR_START = 11;
117 inline void SET_REVISION(byte * b, uint4 rev) { aligned_write4(b, rev); }
118 inline void SET_LEVEL(byte * b, int x) { AssertRel(x,<,256); b[4] = x; }
119 inline void SET_MAX_FREE(byte * b, int x) { unaligned_write2(b + 5, x); }
120 inline void SET_TOTAL_FREE(byte * b, int x) { unaligned_write2(b + 7, x); }
121 inline void SET_DIR_END(byte * b, int x) { unaligned_write2(b + 9, x); }
123 // The item size is stored in 2 bytes, but the top bit is used to store a flag for
124 // "is the tag data compressed" and the next two bits are used to flag if this is the
125 // first and/or last item for this tag.
126 const int I_COMPRESSED_BIT = 0x80;
127 const int I_LAST_BIT = 0x40;
128 const int I_FIRST_BIT = 0x20;
130 const int I_MASK = (I_COMPRESSED_BIT|I_LAST_BIT|I_FIRST_BIT);
132 const int ITEM_SIZE_MASK = (0xffff &~ (I_MASK << 8));
133 const size_t MAX_ITEM_SIZE = (ITEM_SIZE_MASK + 3);
135 /** Freelist blocks have their level set to LEVEL_FREELIST. */
136 const int LEVEL_FREELIST = 254;
138 class RootInfo;
140 class Key {
141 const byte *p;
142 public:
143 explicit Key(const byte * p_) : p(p_) { }
144 const byte * get_address() const { return p; }
145 const byte * data() const { return p + K1; }
146 void read(std::string * key) const {
147 key->assign(reinterpret_cast<const char *>(p + K1), length());
149 int length() const {
150 return p[0];
152 char operator[](size_t i) const {
153 AssertRel(i,<,(size_t)length());
154 return p[i + K1];
158 // LeafItem_wr wants to be "LeafItem with non-const p and more methods" - we can't
159 // achieve that nicely with inheritance, so we use a template base class.
160 template <class T> class LeafItem_base {
161 protected:
162 T p;
163 int get_key_len() const { return p[I2]; }
164 int getD(const byte * q, int c) const {
165 AssertRel(c, >=, DIR_START);
166 AssertRel(c, <, 65535);
167 Assert((c & 1) == 1);
168 return unaligned_read2(q + c);
170 int getI() const { return unaligned_read2(p); }
171 int getX(const byte * q, int c) const { return unaligned_read2(q + c); }
172 public:
173 /* LeafItem from block address and offset to item pointer */
174 LeafItem_base(T p_, int c) : p(p_ + getD(p_, c)) { }
175 LeafItem_base(T p_) : p(p_) { }
176 T get_address() const { return p; }
177 /** SIZE in diagram above. */
178 int size() const {
179 return (getI() & ITEM_SIZE_MASK) + 3;
181 bool get_compressed() const { return *p & I_COMPRESSED_BIT; }
182 bool first_component() const { return *p & I_FIRST_BIT; }
183 bool last_component() const { return *p & I_LAST_BIT; }
184 int component_of() const {
185 if (first_component()) return 1;
186 return getX(p, get_key_len() + I2 + K1);
188 Key key() const { return Key(p + I2); }
189 void append_chunk(std::string * tag) const {
190 // Offset to the start of the tag data.
191 int cd = get_key_len() + I2 + K1;
192 if (!first_component()) cd += X2;
193 // Number of bytes to extract from current component.
194 int l = size() - cd;
195 const char * chunk = reinterpret_cast<const char *>(p + cd);
196 tag->append(chunk, l);
198 bool decompress_chunk(CompressionStream& comp_stream, string& tag) const {
199 // Offset to the start of the tag data.
200 int cd = get_key_len() + I2 + K1;
201 if (!first_component()) cd += X2;
202 // Number of bytes to extract from current component.
203 int l = size() - cd;
204 const char * chunk = reinterpret_cast<const char *>(p + cd);
205 return comp_stream.decompress_chunk(chunk, l, tag);
209 class LeafItem : public LeafItem_base<const byte *> {
210 public:
211 /* LeafItem from block address and offset to item pointer */
212 LeafItem(const byte * p_, int c) : LeafItem_base<const byte *>(p_, c) { }
213 LeafItem(const byte * p_) : LeafItem_base<const byte *>(p_) { }
216 class LeafItem_wr : public LeafItem_base<byte *> {
217 void set_key_len(int x) {
218 AssertRel(x, >=, 0);
219 AssertRel(x, <=, GLASS_BTREE_MAX_KEY_LEN);
220 p[I2] = x;
222 void setI(int x) { unaligned_write2(p, x); }
223 void setX(byte * q, int c, int x) { unaligned_write2(q + c, x); }
224 public:
225 /* LeafItem_wr from block address and offset to item pointer */
226 LeafItem_wr(byte * p_, int c) : LeafItem_base<byte *>(p_, c) { }
227 LeafItem_wr(byte * p_) : LeafItem_base<byte *>(p_) { }
228 void set_component_of(int i) {
229 AssertRel(i,>,1);
230 *p &=~ I_FIRST_BIT;
231 setX(p, get_key_len() + I2 + K1, i);
233 void set_size(int new_size) {
234 AssertRel(new_size,>=,3);
235 int I = new_size - 3;
236 // We should never be able to pass too large a size here, but don't
237 // corrupt the database if this somehow happens.
238 if (rare(I &~ ITEM_SIZE_MASK)) throw Xapian::DatabaseError("item too large!");
239 setI(I);
241 void form_key(const std::string & key_) {
242 std::string::size_type key_len = key_.length();
243 if (key_len > GLASS_BTREE_MAX_KEY_LEN) {
244 // We check term length when a term is added to a document but
245 // glass doubles zero bytes, so this can still happen for terms
246 // which contain one or more zero bytes.
247 std::string msg("Key too long: length was ");
248 msg += str(key_len);
249 msg += " bytes, maximum length of a key is "
250 STRINGIZE(GLASS_BTREE_MAX_KEY_LEN) " bytes";
251 throw Xapian::InvalidArgumentError(msg);
254 set_key_len(key_len);
255 std::memmove(p + I2 + K1, key_.data(), key_len);
256 *p |= I_FIRST_BIT;
258 // FIXME passing cd here is icky
259 void set_tag(int cd, const char *start, int len, bool compressed, int i, int m) {
260 std::memmove(p + cd, start, len);
261 set_size(cd + len);
262 if (compressed) *p |= I_COMPRESSED_BIT;
263 if (i == m) *p |= I_LAST_BIT;
264 if (i == 1) {
265 *p |= I_FIRST_BIT;
266 } else {
267 set_component_of(i);
270 void fake_root_item() {
271 set_key_len(0); // null key length
272 set_size(I2 + K1); // length of the item
273 *p |= I_FIRST_BIT|I_LAST_BIT;
275 operator const LeafItem() const { return LeafItem(p); }
276 static void setD(byte * q, int c, int x) {
277 AssertRel(c, >=, DIR_START);
278 AssertRel(c, <, 65535);
279 Assert((c & 1) == 1);
280 unaligned_write2(q + c, x);
284 /* A branch item has this form:
288 tag K key X
289 ←B→ ←K→
290 <--SIZE--->
292 B = BYTES_PER_BLOCK_NUMBER
294 We can't omit X here, as we've nowhere to store the first and last bit
295 flags which we have in leaf items.
298 // BItem_wr wants to be "BItem with non-const p and more methods" - we can't
299 // achieve that nicely with inheritance, so we use a template base class.
300 template <class T> class BItem_base {
301 protected:
302 T p;
303 int get_key_len() const { return p[BYTES_PER_BLOCK_NUMBER]; }
304 int getD(const byte * q, int c) const {
305 AssertRel(c, >=, DIR_START);
306 AssertRel(c, <, 65535);
307 Assert((c & 1) == 1);
308 return unaligned_read2(q + c);
310 int getX(const byte * q, int c) const { return unaligned_read2(q + c); }
311 public:
312 /* BItem from block address and offset to item pointer */
313 BItem_base(T p_, int c) : p(p_ + getD(p_, c)) { }
314 BItem_base(T p_) : p(p_) { }
315 T get_address() const { return p; }
316 /** SIZE in diagram above. */
317 int size() const {
318 return get_key_len() + K1 + X2 + BYTES_PER_BLOCK_NUMBER;
320 Key key() const { return Key(p + BYTES_PER_BLOCK_NUMBER); }
321 /** Get this item's tag as a block number (this block should not be at
322 * level 0).
324 uint4 block_given_by() const {
325 return unaligned_read4(p);
327 int component_of() const {
328 return getX(p, get_key_len() + BYTES_PER_BLOCK_NUMBER + K1);
332 class BItem : public BItem_base<const byte *> {
333 public:
334 /* BItem from block address and offset to item pointer */
335 BItem(const byte * p_, int c) : BItem_base<const byte *>(p_, c) { }
336 BItem(const byte * p_) : BItem_base<const byte *>(p_) { }
339 class BItem_wr : public BItem_base<byte *> {
340 void set_key_len(int x) {
341 AssertRel(x, >=, 0);
342 AssertRel(x, <, GLASS_BTREE_MAX_KEY_LEN);
343 p[BYTES_PER_BLOCK_NUMBER] = x;
345 void setX(byte * q, int c, int x) { unaligned_write2(q + c, x); }
346 public:
347 /* BItem_wr from block address and offset to item pointer */
348 BItem_wr(byte * p_, int c) : BItem_base<byte *>(p_, c) { }
349 BItem_wr(byte * p_) : BItem_base<byte *>(p_) { }
350 void set_component_of(int i) {
351 setX(p, get_key_len() + BYTES_PER_BLOCK_NUMBER + K1, i);
353 void set_key_and_block(Key newkey, uint4 n) {
354 int len = newkey.length() + K1 + X2;
355 // Copy the key size, main part of the key and the count part.
356 std::memcpy(p + BYTES_PER_BLOCK_NUMBER, newkey.get_address(), len);
357 // Set tag contents to block number
358 set_block_given_by(n);
360 // Takes size as we may be truncating newkey.
361 void set_truncated_key_and_block(Key newkey, int new_comp,
362 int truncate_size, uint4 n) {
363 int i = truncate_size;
364 AssertRel(i,<=,newkey.length());
365 // Key size
366 set_key_len(i);
367 // Copy the main part of the key, possibly truncating.
368 std::memcpy(p + BYTES_PER_BLOCK_NUMBER + K1, newkey.get_address() + K1, i);
369 // Set the component count.
370 setX(p, BYTES_PER_BLOCK_NUMBER + K1 + i, new_comp);
371 // Set tag contents to block number
372 set_block_given_by(n);
375 /** Set this item's tag to point to block n (this block should not be at
376 * level 0).
378 void set_block_given_by(uint4 n) {
379 unaligned_write4(p, n);
381 /** Form an item with a null key and with block number n in the tag.
383 void form_null_key(uint4 n) {
384 set_block_given_by(n);
385 set_key_len(0); /* null key */
386 set_component_of(0);
388 operator const BItem() const { return BItem(p); }
389 static void setD(byte * q, int c, int x) {
390 AssertRel(c, >=, DIR_START);
391 AssertRel(c, <, 65535);
392 Assert((c & 1) == 1);
393 unaligned_write2(q + c, x);
397 // Allow for BTREE_CURSOR_LEVELS levels in the B-tree.
398 // With 10, overflow is practically impossible
399 // FIXME: but we want it to be completely impossible...
400 const int BTREE_CURSOR_LEVELS = 10;
404 using Glass::RootInfo;
406 class GlassChanges;
408 /** Class managing a Btree table in a Glass database.
410 * A table is a store holding a set of key/tag pairs.
412 * A key is used to access a block of data in a glass table.
414 * Keys are of limited length.
416 * Keys may not be empty (each Btree has a special empty key for internal use).
418 * A tag is a piece of data associated with a given key. The contents
419 * of the tag are opaque to the Btree.
421 * Tags may be of arbitrary length (the Btree imposes a very large limit).
422 * Note though that they will be loaded into memory in their entirety, so
423 * should not be permitted to grow without bound in normal usage.
425 * Tags which are null strings _are_ valid, and are different from a
426 * tag simply not being in the table.
428 class GlassTable {
429 friend class GlassCursor; /* Should probably fix this. */
430 friend class GlassFreeList;
431 private:
432 /// Copying not allowed
433 GlassTable(const GlassTable &);
435 /// Assignment not allowed
436 GlassTable & operator=(const GlassTable &);
438 void basic_open(const RootInfo * root_info,
439 glass_revision_number_t rev);
441 /** Perform the opening operation to read. */
442 void do_open_to_read(const RootInfo * root_info,
443 glass_revision_number_t rev);
445 /** Perform the opening operation to write. */
446 void do_open_to_write(const RootInfo * root_info,
447 glass_revision_number_t rev = 0);
449 public:
450 /** Create a new Btree object.
452 * This does not create the table on disk - the create_and_open()
453 * method must be called to create the table on disk.
455 * This also does not open the table - either the create_and_open()
456 * or open() methods must be called before use is made of the table.
458 * @param tablename_ The name of the table (used in changesets).
459 * @param path_ Path at which the table is stored.
460 * @param readonly_ whether to open the table for read only access.
461 * @param lazy If true, don't create the table until it's
462 * needed.
464 GlassTable(const char * tablename_, const std::string & path_,
465 bool readonly_, bool lazy = false);
467 GlassTable(const char * tablename_, int fd, off_t offset_,
468 bool readonly_, bool lazy = false);
470 /** Close the Btree.
472 * Any outstanding changes (ie, changes made without commit() having
473 * subsequently been called) will be lost.
475 ~GlassTable();
477 /** Close the Btree. This closes and frees any of the btree
478 * structures which have been created and opened.
480 * @param permanent If true, the Btree will not reopen on demand.
482 void close(bool permanent = false);
484 bool readahead_key(const string &key) const;
486 /** Determine whether the btree exists on disk.
488 bool exists() const;
490 /** Open the btree.
492 * @param flags_ flags for opening
493 * @param root_info root block info
495 * @exception Xapian::DatabaseCorruptError will be thrown if the table
496 * is in a corrupt state.
497 * @exception Xapian::DatabaseOpeningError will be thrown if the table
498 * cannot be opened (but is not corrupt - eg, permission problems,
499 * not present, etc).
501 void open(int flags_, const RootInfo & root_info,
502 glass_revision_number_t rev);
504 /** Return true if this table is open.
506 * NB If the table is lazy and doesn't yet exist, returns false.
508 bool is_open() const { return handle >= 0; }
510 /** Return true if this table is writable. */
511 bool is_writable() const { return writable; }
513 /** Flush any outstanding changes to the DB file of the table.
515 * This must be called before commit, to ensure that the DB file is
516 * ready to be switched to a new version by the commit.
518 void flush_db();
520 /** Commit any outstanding changes to the table.
522 * Commit changes made by calling add() and del() to the Btree.
524 * If an error occurs during the operation, this will be signalled
525 * by an exception. In case of error, changes made will not be
526 * committed to the Btree - they will be discarded.
528 * @param new_revision The new revision number to store. This must
529 * be greater than the current revision number. FIXME: If
530 * we support rewinding to a previous revision, maybe this
531 * needs to be greater than any previously used revision.
533 * @param root_info Information about the root is returned in this.
535 void commit(glass_revision_number_t revision, RootInfo * root_info);
537 bool sync() {
538 return (flags & Xapian::DB_NO_SYNC) ||
539 handle < 0 ||
540 io_sync(handle);
543 /** Cancel any outstanding changes.
545 * This will discard any modifications which haven't been committed
546 * by calling commit().
548 void cancel(const RootInfo & root_info, glass_revision_number_t rev);
550 /** Read an entry from the table, if and only if it is exactly that
551 * being asked for.
553 * If the key is found in the table, then the tag is copied to @a
554 * tag. If the key is not found tag is left unchanged.
556 * The result is true iff the specified key is found in the Btree.
558 * @param key The key to look for in the table.
559 * @param tag A tag object to fill with the value if found.
561 * @return true if key is found in table,
562 * false if key is not found in table.
564 bool get_exact_entry(const std::string & key, std::string & tag) const;
566 /** Check if a key exists in the Btree.
568 * This is just like get_exact_entry() except it doesn't read the tag
569 * value so is more efficient if you only want to check that the key
570 * exists.
572 * @param key The key to look for in the table.
574 * @return true if key is found in table,
575 * false if key is not found in table.
577 bool key_exists(const std::string &key) const;
579 /** Read the tag value for the key pointed to by cursor C_.
581 * @param keep_compressed Don't uncompress the tag - e.g. useful
582 * if it's just being opaquely copied.
584 * @return true if current_tag holds compressed data (always
585 * false if keep_compressed was false).
587 bool read_tag(Glass::Cursor * C_, std::string *tag, bool keep_compressed) const;
589 /** Add a key/tag pair to the table, replacing any existing pair with
590 * the same key.
592 * If an error occurs during the operation, an exception will be
593 * thrown.
595 * If key is empty, then the null item is replaced.
597 * e.g. btree.add("TODAY", "Mon 9 Oct 2000");
599 * @param key The key to store in the table.
600 * @param tag The tag to store in the table.
601 * @param already_compressed true if tag is already compressed,
602 * for example because it is being opaquely copied
603 * (default: false).
605 void add(const std::string &key, std::string tag, bool already_compressed = false);
607 /** Delete an entry from the table.
609 * The entry will be removed from the table, if it exists. If
610 * it does not exist, no action will be taken. The item with
611 * an empty key can't be removed, and false is returned.
613 * If an error occurs during the operation, this will be signalled
614 * by an exception.
616 * e.g. bool deleted = btree.del("TODAY")
618 * @param key The key to remove from the table.
620 * @return true if an entry was removed; false if it did not exist.
622 bool del(const std::string &key);
624 int get_flags() const { return flags; }
626 /** Create a new empty btree structure on disk and open it at the
627 * initial revision.
629 * The table must be writable - it doesn't make sense to create
630 * a table that is read-only!
632 * The block size must be less than 64K, where K = 1024. It is unwise
633 * to use a small block size (less than 1024 perhaps), so we enforce a
634 * minimum block size of 2K.
636 * Example:
638 * // File will be "X." + GLASS_TABLE_EXTENSION (i.e. "X.glass")
639 * Btree btree("X.");
640 * btree.create_and_open(0, root_info);
642 * @param root_info RootInfo object
644 * @exception Xapian::DatabaseCreateError if the table can't be
645 * created.
646 * @exception Xapian::InvalidArgumentError if the requested blocksize
647 * is unsuitable.
649 void create_and_open(int flags_, const RootInfo & root_info);
651 void set_full_compaction(bool parity);
653 /** Get the revision number at which this table
654 * is currently open.
656 * It is possible that there are other, more recent or older
657 * revisions available.
659 * @return the current revision number.
661 glass_revision_number_t get_open_revision_number() const {
662 return revision_number;
665 /** Return a count of the number of entries in the table.
667 * The count does not include the ever-present item with null key.
669 * Use @a empty() if you only want to know if the table is empty or
670 * not.
672 * @return The number of entries in the table.
674 glass_tablesize_t get_entry_count() const {
675 return item_count;
678 /// Return true if there are no entries in the table.
679 bool empty() const {
680 return (item_count == 0);
683 /** Get a cursor for reading from the table.
685 * The cursor is owned by the caller - it is the caller's
686 * responsibility to ensure that it is deleted.
688 GlassCursor * cursor_get() const;
690 /** Determine whether the object contains uncommitted modifications.
692 * @return true if there have been modifications since the last
693 * the last call to commit().
695 bool is_modified() const { return Btree_modified; }
697 /** Set the maximum item size given the block capacity.
699 * At least this many items of maximum size must fit into a block.
700 * The default is BLOCK_CAPACITY (which is currently 4).
702 void set_max_item_size(size_t block_capacity) {
703 if (block_capacity > Glass::BLOCK_CAPACITY)
704 block_capacity = Glass::BLOCK_CAPACITY;
705 using Glass::DIR_START;
706 using Glass::D2;
707 max_item_size = (block_size - DIR_START - block_capacity * D2)
708 / block_capacity;
709 // Make sure we don't exceed the limit imposed by the format.
710 if (max_item_size > Glass::MAX_ITEM_SIZE)
711 max_item_size = Glass::MAX_ITEM_SIZE;
714 /** Set the GlassChanges object to write changed blocks to.
716 * The GlassChanges object is not owned by the table, so the table
717 * must not delete it.
719 void set_changes(GlassChanges * changes) {
720 changes_obj = changes;
723 /// Throw an exception indicating that the database is closed.
724 XAPIAN_NORETURN(static void throw_database_closed());
726 string get_path() const {
727 return name + GLASS_TABLE_EXTENSION;
730 protected:
732 bool find(Glass::Cursor *) const;
733 int delete_kt();
734 void read_block(uint4 n, byte *p) const;
735 void write_block(uint4 n, const byte *p, bool appending = false) const;
736 XAPIAN_NORETURN(void set_overwritten() const);
737 void block_to_cursor(Glass::Cursor *C_, int j, uint4 n) const;
738 void alter();
739 void compact(byte *p);
740 void enter_key_above_leaf(Glass::LeafItem previtem, Glass::LeafItem newitem);
741 void enter_key_above_branch(int j, Glass::BItem newitem);
742 int mid_point(byte *p);
743 void add_item_to_leaf(byte *p, Glass::LeafItem kt, int c);
744 void add_item_to_branch(byte *p, Glass::BItem kt, int c);
745 void add_leaf_item(Glass::LeafItem kt);
746 void add_branch_item(Glass::BItem kt, int j);
747 void delete_leaf_item(bool repeatedly);
748 void delete_branch_item(int j);
749 int add_kt(bool found);
750 void read_root();
751 void split_root(uint4 split_n);
752 void form_key(const std::string & key) const;
754 /// The name of the table (used when writing changesets).
755 const char * tablename;
757 /** revision number of the opened B-tree. */
758 glass_revision_number_t revision_number;
760 /** keeps a count of the number of items in the B-tree. */
761 glass_tablesize_t item_count;
763 /** block size of the B tree in bytes */
764 unsigned int block_size;
766 /** Flags like DB_NO_SYNC and DB_DANGEROUS. */
767 int flags;
769 /** true if the root block is faked (not written to disk).
770 * false otherwise. This is true when the btree hasn't been
771 * modified yet.
773 bool faked_root_block;
775 /** true iff the data has been written in a single write in
776 * sequential order.
778 bool sequential;
780 /** File descriptor of the table.
782 * If close() has been called, this will be -2.
784 * If the table is lazily created and doesn't yet exist, this will be
785 * -1 (for a multi-file database) or -3-fd (for a single-file database).
787 int handle;
789 /// number of levels, counting from 0
790 int level;
792 /// the root block of the B-tree
793 uint4 root;
795 /// buffer of size block_size for making up key-tag items
796 mutable Glass::LeafItem_wr kt;
798 /// buffer of size block_size for reforming blocks
799 byte * buffer;
801 /// List of free blocks.
802 GlassFreeList free_list;
804 /** The path name of the B tree.
806 * For a single-file database, this will be empty.
808 std::string name;
810 /** count of the number of successive instances of purely
811 * sequential addition, starting at SEQ_START_POINT (neg) and
812 * going up to zero. */
813 int seq_count;
815 /** the last block to be changed by an addition */
816 uint4 changed_n;
818 /** directory offset corresponding to last block to be changed
819 * by an addition */
820 int changed_c;
822 /// maximum size of an item (key-tag pair)
823 size_t max_item_size;
825 /// Set to true the first time the B-tree is modified.
826 mutable bool Btree_modified;
828 /// set to true when full compaction is to be achieved
829 bool full_compaction;
831 /// Set to true when the database is opened to write.
832 bool writable;
834 /// Flag for tracking when cursors need to rebuild.
835 mutable bool cursor_created_since_last_modification;
837 /// Version count for tracking when cursors need to rebuild.
838 unsigned long cursor_version;
840 /** The GlassChanges object to write block changes to.
842 * If NULL, no changes will be written.
844 GlassChanges * changes_obj;
846 bool single_file() const {
847 return name.empty();
850 /* B-tree navigation functions */
851 bool prev(Glass::Cursor *C_, int j) const {
852 if (sequential && !single_file())
853 return prev_for_sequential(C_, j);
854 return prev_default(C_, j);
857 bool next(Glass::Cursor *C_, int j) const {
858 if (sequential) return next_for_sequential(C_, j);
859 return next_default(C_, j);
862 /* Default implementations. */
863 bool prev_default(Glass::Cursor *C_, int j) const;
864 bool next_default(Glass::Cursor *C_, int j) const;
866 /* Implementations for sequential mode. */
867 bool prev_for_sequential(Glass::Cursor *C_, int dummy) const;
868 bool next_for_sequential(Glass::Cursor *C_, int dummy) const;
870 static int find_in_leaf(const byte * p, Glass::LeafItem item, int c, bool& exact);
871 static int find_in_branch(const byte * p, Glass::LeafItem item, int c);
872 static int find_in_branch(const byte * p, Glass::BItem item, int c);
874 /** block_given_by(p, c) finds the item at block address p, directory
875 * offset c, and returns its tag value as an integer.
877 static uint4 block_given_by(const byte * p, int c);
879 mutable Glass::Cursor C[Glass::BTREE_CURSOR_LEVELS];
881 /** Buffer used when splitting a block.
883 * This buffer holds the split off part of the block. It's only used
884 * when updating (in GlassTable::add_item().
886 byte * split_p;
888 /** Minimum size tag to try compressing (0 for no compression). */
889 uint4 compress_min;
891 mutable CompressionStream comp_stream;
893 /// If true, don't create the table until it's needed.
894 bool lazy;
896 /// Last block readahead_key() preread.
897 mutable uint4 last_readahead;
899 /// offset to start of table in file.
900 off_t offset;
902 /* Debugging methods */
903 // void report_block_full(int m, int n, const byte * p);
906 namespace Glass {
908 /** Compare two items by their keys.
910 The result is negative if a precedes b, positive is b precedes a, and
911 0 if a and b are equal. The comparison is for byte sequence collating
912 order, taking lengths into account. So if the keys are made up of lower case
913 ASCII letters we get alphabetical ordering.
915 Now remember that items are added into the B-tree in fastest time
916 when they are preordered by their keys. This is therefore the piece
917 of code that needs to be followed to arrange for the preordering.
919 Note that keys have two parts - a string value and a "component_of" count.
920 If the string values are equal, the comparison is made based on
921 "component_of".
924 template<typename ITEM1, typename ITEM2>
925 int compare(ITEM1 a, ITEM2 b)
927 Key key1 = a.key();
928 Key key2 = b.key();
929 const byte* p1 = key1.data();
930 const byte* p2 = key2.data();
931 int key1_len = key1.length();
932 int key2_len = key2.length();
933 int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
935 // Compare the common part of the keys.
936 int diff = std::memcmp(p1, p2, k_smaller);
937 if (diff == 0) {
938 // If the common part matches, compare the lengths.
939 diff = key1_len - key2_len;
940 if (diff == 0) {
941 // If the strings match, compare component_of().
942 diff = a.component_of() - b.component_of();
945 return diff;
948 /** Compare two BItem objects by their keys.
950 * Specialisation for comparing two BItems, where component_of is always
951 * explicitly stored.
953 inline int
954 compare(BItem a, BItem b)
956 Key key1 = a.key();
957 Key key2 = b.key();
958 const byte* p1 = key1.data();
959 const byte* p2 = key2.data();
960 int key1_len = key1.length();
961 int key2_len = key2.length();
962 if (key1_len == key2_len) {
963 // The keys are the same length, so we can compare the counts in the
964 // same operation since they're stored as 2 byte bigendian numbers.
965 int len = key1_len + X2;
966 return std::memcmp(p1, p2, len);
969 int k_smaller = (key2_len < key1_len ? key2_len : key1_len);
971 // Compare the common part of the keys.
972 int diff = std::memcmp(p1, p2, k_smaller);
973 if (diff == 0) {
974 // If the common part matches, compare the lengths.
975 diff = key1_len - key2_len;
976 // We dealt with the "same length" case above so we never need to check
977 // component_of here.
979 return diff;
984 #endif /* OM_HGUARD_GLASS_TABLE_H */