mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / ibuf / ibuf0ibuf.c
blob5d018bcdbc981c1dbd5c64c2a4fdcf3dd67b654f
1 /*****************************************************************************
3 Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 *****************************************************************************/
19 /**************************************************//**
20 @file ibuf/ibuf0ibuf.c
21 Insert buffer
23 Created 7/19/1997 Heikki Tuuri
24 *******************************************************/
26 #include "ibuf0ibuf.h"
28 /** Number of bits describing a single page */
29 #define IBUF_BITS_PER_PAGE 4
30 #if IBUF_BITS_PER_PAGE % 2
31 # error "IBUF_BITS_PER_PAGE must be an even number!"
32 #endif
33 /** The start address for an insert buffer bitmap page bitmap */
34 #define IBUF_BITMAP PAGE_DATA
36 #ifdef UNIV_NONINL
37 #include "ibuf0ibuf.ic"
38 #endif
40 #ifndef UNIV_HOTBACKUP
42 #include "buf0buf.h"
43 #include "buf0rea.h"
44 #include "fsp0fsp.h"
45 #include "trx0sys.h"
46 #include "fil0fil.h"
47 #include "thr0loc.h"
48 #include "rem0rec.h"
49 #include "btr0cur.h"
50 #include "btr0pcur.h"
51 #include "btr0btr.h"
52 #include "row0upd.h"
53 #include "sync0sync.h"
54 #include "dict0boot.h"
55 #include "fut0lst.h"
56 #include "lock0lock.h"
57 #include "log0recv.h"
58 #include "que0que.h"
59 #include "rem0cmp.h"
61 /* STRUCTURE OF AN INSERT BUFFER RECORD
63 In versions < 4.1.x:
65 1. The first field is the page number.
66 2. The second field is an array which stores type info for each subsequent
67 field. We store the information which affects the ordering of records, and
68 also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
69 is 10 bytes.
70 3. Next we have the fields of the actual index record.
72 In versions >= 4.1.x:
74 Note that contary to what we planned in the 1990's, there will only be one
75 insert buffer tree, and that is in the system tablespace of InnoDB.
77 1. The first field is the space id.
78 2. The second field is a one-byte marker (0) which differentiates records from
79 the < 4.1.x storage format.
80 3. The third field is the page number.
81 4. The fourth field contains the type info, where we have also added 2 bytes to
82 store the charset. In the compressed table format of 5.0.x we must add more
83 information here so that we can build a dummy 'index' struct which 5.0.x
84 can use in the binary search on the index page in the ibuf merge phase.
85 5. The rest of the fields contain the fields of the actual index record.
87 In versions >= 5.0.3:
89 The first byte of the fourth field is an additional marker (0) if the record
90 is in the compact format. The presence of this marker can be detected by
91 looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
93 The high-order bit of the character set field in the type info is the
94 "nullable" flag for the field. */
97 /* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
99 If an OS thread performs any operation that brings in disk pages from
100 non-system tablespaces into the buffer pool, or creates such a page there,
101 then the operation may have as a side effect an insert buffer index tree
102 compression. Thus, the tree latch of the insert buffer tree may be acquired
103 in the x-mode, and also the file space latch of the system tablespace may
104 be acquired in the x-mode.
106 Also, an insert to an index in a non-system tablespace can have the same
107 effect. How do we know this cannot lead to a deadlock of OS threads? There
108 is a problem with the i\o-handler threads: they break the latching order
109 because they own x-latches to pages which are on a lower level than the
110 insert buffer tree latch, its page latches, and the tablespace latch an
111 insert buffer operation can reserve.
113 The solution is the following: Let all the tree and page latches connected
114 with the insert buffer be later in the latching order than the fsp latch and
115 fsp page latches.
117 Insert buffer pages must be such that the insert buffer is never invoked
118 when these pages are accessed as this would result in a recursion violating
119 the latching order. We let a special i/o-handler thread take care of i/o to
120 the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
121 pages and the first inode page, which contains the inode of the ibuf tree: let
122 us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
123 access both non-ibuf and ibuf pages.
125 Then an i/o-handler for the insert buffer never needs to access recursively the
126 insert buffer tree and thus obeys the latching order. On the other hand, other
127 i/o-handlers for other tablespaces may require access to the insert buffer,
128 but because all kinds of latches they need to access there are later in the
129 latching order, no violation of the latching order occurs in this case,
130 either.
132 A problem is how to grow and contract an insert buffer tree. As it is later
133 in the latching order than the fsp management, we have to reserve the fsp
134 latch first, before adding or removing pages from the insert buffer tree.
135 We let the insert buffer tree have its own file space management: a free
136 list of pages linked to the tree root. To prevent recursive using of the
137 insert buffer when adding pages to the tree, we must first load these pages
138 to memory, obtaining a latch on them, and only after that add them to the
139 free list of the insert buffer tree. More difficult is removing of pages
140 from the free list. If there is an excess of pages in the free list of the
141 ibuf tree, they might be needed if some thread reserves the fsp latch,
142 intending to allocate more file space. So we do the following: if a thread
143 reserves the fsp latch, we check the writer count field of the latch. If
144 this field has value 1, it means that the thread did not own the latch
145 before entering the fsp system, and the mtr of the thread contains no
146 modifications to the fsp pages. Now we are free to reserve the ibuf latch,
147 and check if there is an excess of pages in the free list. We can then, in a
148 separate mini-transaction, take them out of the free list and free them to
149 the fsp system.
151 To avoid deadlocks in the ibuf system, we divide file pages into three levels:
153 (1) non-ibuf pages,
154 (2) ibuf tree pages and the pages in the ibuf tree free list, and
155 (3) ibuf bitmap pages.
157 No OS thread is allowed to access higher level pages if it has latches to
158 lower level pages; even if the thread owns a B-tree latch it must not access
159 the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
160 is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
161 exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
162 level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
163 it uses synchronous aio, it can access any pages, as long as it obeys the
164 access order rules. */
166 /** Buffer pool size per the maximum insert buffer size */
167 #define IBUF_POOL_SIZE_PER_MAX_SIZE 2
169 /** Table name for the insert buffer. */
170 #define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
172 /** Operations that can currently be buffered. */
173 UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_INSERT;
175 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
176 /** Flag to control insert buffer debugging. */
177 UNIV_INTERN uint ibuf_debug;
178 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
180 /** The insert buffer control structure */
181 UNIV_INTERN ibuf_t* ibuf = NULL;
183 /** Counter for ibuf_should_try() */
184 UNIV_INTERN ulint ibuf_flush_count = 0;
186 #ifdef UNIV_IBUF_COUNT_DEBUG
187 /** Number of tablespaces in the ibuf_counts array */
188 #define IBUF_COUNT_N_SPACES 4
189 /** Number of pages within each tablespace in the ibuf_counts array */
190 #define IBUF_COUNT_N_PAGES 130000
192 /** Buffered entry counts for file pages, used in debugging */
193 static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
195 /******************************************************************//**
196 Checks that the indexes to ibuf_counts[][] are within limits. */
197 UNIV_INLINE
198 void
199 ibuf_count_check(
200 /*=============*/
201 ulint space_id, /*!< in: space identifier */
202 ulint page_no) /*!< in: page number */
204 if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
205 return;
208 fprintf(stderr,
209 "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
210 "InnoDB: and breaks crash recovery.\n"
211 "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
212 "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
213 (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
214 (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
215 ut_error;
217 #endif
219 /** @name Offsets to the per-page bits in the insert buffer bitmap */
220 /* @{ */
221 #define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
222 amount of free space */
223 #define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
224 changes for the page */
225 #define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
226 the ibuf tree, excluding the
227 root page, or is in the free
228 list of the ibuf */
229 /* @} */
231 /** The mutex used to block pessimistic inserts to ibuf trees */
232 static mutex_t ibuf_pessimistic_insert_mutex;
234 /** The mutex protecting the insert buffer structs */
235 static mutex_t ibuf_mutex;
237 /** The mutex protecting the insert buffer bitmaps */
238 static mutex_t ibuf_bitmap_mutex;
240 /** The area in pages from which contract looks for page numbers for merge */
241 #define IBUF_MERGE_AREA 8
243 /** Inside the merge area, pages which have at most 1 per this number less
244 buffered entries compared to maximum volume that can buffered for a single
245 page are merged along with the page whose buffer became full */
246 #define IBUF_MERGE_THRESHOLD 4
248 /** In ibuf_contract at most this number of pages is read to memory in one
249 batch, in order to merge the entries for them in the insert buffer */
250 #define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
252 /** If the combined size of the ibuf trees exceeds ibuf->max_size by this
253 many pages, we start to contract it in connection to inserts there, using
254 non-synchronous contract */
255 #define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
257 /** If the combined size of the ibuf trees exceeds ibuf->max_size by this
258 many pages, we start to contract it in connection to inserts there, using
259 synchronous contract */
260 #define IBUF_CONTRACT_ON_INSERT_SYNC 5
262 /** If the combined size of the ibuf trees exceeds ibuf->max_size by
263 this many pages, we start to contract it synchronous contract, but do
264 not insert */
265 #define IBUF_CONTRACT_DO_NOT_INSERT 10
267 /* TODO: how to cope with drop table if there are records in the insert
268 buffer for the indexes of the table? Is there actually any problem,
269 because ibuf merge is done to a page when it is read in, and it is
270 still physically like the index page even if the index would have been
271 dropped! So, there seems to be no problem. */
273 /******************************************************************//**
274 Sets the flag in the current OS thread local storage denoting that it is
275 inside an insert buffer routine. */
276 UNIV_INLINE
277 void
278 ibuf_enter(void)
279 /*============*/
281 ibool* ptr;
283 ptr = thr_local_get_in_ibuf_field();
285 ut_ad(*ptr == FALSE);
287 *ptr = TRUE;
290 /******************************************************************//**
291 Sets the flag in the current OS thread local storage denoting that it is
292 exiting an insert buffer routine. */
293 UNIV_INLINE
294 void
295 ibuf_exit(void)
296 /*===========*/
298 ibool* ptr;
300 ptr = thr_local_get_in_ibuf_field();
302 ut_ad(*ptr == TRUE);
304 *ptr = FALSE;
307 /******************************************************************//**
308 Returns TRUE if the current OS thread is performing an insert buffer
309 routine.
311 For instance, a read-ahead of non-ibuf pages is forbidden by threads
312 that are executing an insert buffer routine.
313 @return TRUE if inside an insert buffer routine */
314 UNIV_INTERN
315 ibool
316 ibuf_inside(void)
317 /*=============*/
319 return(*thr_local_get_in_ibuf_field());
322 /******************************************************************//**
323 Gets the ibuf header page and x-latches it.
324 @return insert buffer header page */
325 static
326 page_t*
327 ibuf_header_page_get(
328 /*=================*/
329 mtr_t* mtr) /*!< in: mtr */
331 buf_block_t* block;
333 ut_ad(!ibuf_inside());
335 block = buf_page_get(
336 IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
337 buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
339 return(buf_block_get_frame(block));
342 /******************************************************************//**
343 Gets the root page and x-latches it.
344 @return insert buffer tree root page */
345 static
346 page_t*
347 ibuf_tree_root_get(
348 /*===============*/
349 mtr_t* mtr) /*!< in: mtr */
351 buf_block_t* block;
353 ut_ad(ibuf_inside());
355 mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
357 block = buf_page_get(
358 IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
360 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
362 return(buf_block_get_frame(block));
365 #ifdef UNIV_IBUF_COUNT_DEBUG
366 /******************************************************************//**
367 Gets the ibuf count for a given page.
368 @return number of entries in the insert buffer currently buffered for
369 this page */
370 UNIV_INTERN
371 ulint
372 ibuf_count_get(
373 /*===========*/
374 ulint space, /*!< in: space id */
375 ulint page_no)/*!< in: page number */
377 ibuf_count_check(space, page_no);
379 return(ibuf_counts[space][page_no]);
382 /******************************************************************//**
383 Sets the ibuf count for a given page. */
384 static
385 void
386 ibuf_count_set(
387 /*===========*/
388 ulint space, /*!< in: space id */
389 ulint page_no,/*!< in: page number */
390 ulint val) /*!< in: value to set */
392 ibuf_count_check(space, page_no);
393 ut_a(val < UNIV_PAGE_SIZE);
395 ibuf_counts[space][page_no] = val;
397 #endif
399 /******************************************************************//**
400 Closes insert buffer and frees the data structures. */
401 UNIV_INTERN
402 void
403 ibuf_close(void)
404 /*============*/
406 mutex_free(&ibuf_pessimistic_insert_mutex);
407 memset(&ibuf_pessimistic_insert_mutex,
408 0x0, sizeof(ibuf_pessimistic_insert_mutex));
410 mutex_free(&ibuf_mutex);
411 memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
413 mutex_free(&ibuf_bitmap_mutex);
414 memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
416 mem_free(ibuf);
417 ibuf = NULL;
420 /******************************************************************//**
421 Updates the size information of the ibuf, assuming the segment size has not
422 changed. */
423 static
424 void
425 ibuf_size_update(
426 /*=============*/
427 const page_t* root, /*!< in: ibuf tree root */
428 mtr_t* mtr) /*!< in: mtr */
430 ut_ad(mutex_own(&ibuf_mutex));
432 ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
433 + PAGE_BTR_IBUF_FREE_LIST, mtr);
435 ibuf->height = 1 + btr_page_get_level(root, mtr);
437 /* the '1 +' is the ibuf header page */
438 ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
440 ibuf->empty = page_get_n_recs(root) == 0;
443 /******************************************************************//**
444 Creates the insert buffer data structure at a database startup and initializes
445 the data structures for the insert buffer. */
446 UNIV_INTERN
447 void
448 ibuf_init_at_db_start(void)
449 /*=======================*/
451 page_t* root;
452 mtr_t mtr;
453 dict_table_t* table;
454 mem_heap_t* heap;
455 dict_index_t* index;
456 ulint n_used;
457 page_t* header_page;
458 ulint error;
460 ibuf = mem_alloc(sizeof(ibuf_t));
462 memset(ibuf, 0, sizeof(*ibuf));
464 /* Note that also a pessimistic delete can sometimes make a B-tree
465 grow in size, as the references on the upper levels of the tree can
466 change */
468 ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE
469 / IBUF_POOL_SIZE_PER_MAX_SIZE;
471 mutex_create(&ibuf_pessimistic_insert_mutex,
472 SYNC_IBUF_PESS_INSERT_MUTEX);
474 mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX);
476 mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
478 mtr_start(&mtr);
480 mutex_enter(&ibuf_mutex);
482 mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
484 header_page = ibuf_header_page_get(&mtr);
486 fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
487 &n_used, &mtr);
488 ibuf_enter();
490 ut_ad(n_used >= 2);
492 ibuf->seg_size = n_used;
495 buf_block_t* block;
497 block = buf_page_get(
498 IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
499 RW_X_LATCH, &mtr);
500 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
502 root = buf_block_get_frame(block);
505 ibuf_size_update(root, &mtr);
506 mutex_exit(&ibuf_mutex);
508 mtr_commit(&mtr);
510 ibuf_exit();
512 heap = mem_heap_create(450);
514 /* Use old-style record format for the insert buffer. */
515 table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0);
517 dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
519 table->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
521 dict_table_add_to_cache(table, heap);
522 mem_heap_free(heap);
524 index = dict_mem_index_create(
525 IBUF_TABLE_NAME, "CLUST_IND",
526 IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
528 dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
530 index->id = ut_dulint_add(DICT_IBUF_ID_MIN, IBUF_SPACE_ID);
532 error = dict_index_add_to_cache(table, index,
533 FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
534 ut_a(error == DB_SUCCESS);
536 ibuf->index = dict_table_get_first_index(table);
538 #endif /* !UNIV_HOTBACKUP */
539 /*********************************************************************//**
540 Initializes an ibuf bitmap page. */
541 UNIV_INTERN
542 void
543 ibuf_bitmap_page_init(
544 /*==================*/
545 buf_block_t* block, /*!< in: bitmap page */
546 mtr_t* mtr) /*!< in: mtr */
548 page_t* page;
549 ulint byte_offset;
550 ulint zip_size = buf_block_get_zip_size(block);
552 ut_a(ut_is_2pow(zip_size));
554 page = buf_block_get_frame(block);
555 fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
557 /* Write all zeros to the bitmap */
559 if (!zip_size) {
560 byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
561 * IBUF_BITS_PER_PAGE);
562 } else {
563 byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
566 memset(page + IBUF_BITMAP, 0, byte_offset);
568 /* The remaining area (up to the page trailer) is uninitialized. */
570 #ifndef UNIV_HOTBACKUP
571 mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
572 #endif /* !UNIV_HOTBACKUP */
575 /*********************************************************************//**
576 Parses a redo log record of an ibuf bitmap page init.
577 @return end of log record or NULL */
578 UNIV_INTERN
579 byte*
580 ibuf_parse_bitmap_init(
581 /*===================*/
582 byte* ptr, /*!< in: buffer */
583 byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
584 buf_block_t* block, /*!< in: block or NULL */
585 mtr_t* mtr) /*!< in: mtr or NULL */
587 ut_ad(ptr && end_ptr);
589 if (block) {
590 ibuf_bitmap_page_init(block, mtr);
593 return(ptr);
595 #ifndef UNIV_HOTBACKUP
596 /********************************************************************//**
597 Gets the desired bits for a given page from a bitmap page.
598 @return value of bits */
599 UNIV_INLINE
600 ulint
601 ibuf_bitmap_page_get_bits(
602 /*======================*/
603 const page_t* page, /*!< in: bitmap page */
604 ulint page_no,/*!< in: page whose bits to get */
605 ulint zip_size,/*!< in: compressed page size in bytes;
606 0 for uncompressed pages */
607 ulint bit, /*!< in: IBUF_BITMAP_FREE,
608 IBUF_BITMAP_BUFFERED, ... */
609 mtr_t* mtr __attribute__((unused)))
610 /*!< in: mtr containing an
611 x-latch to the bitmap page */
613 ulint byte_offset;
614 ulint bit_offset;
615 ulint map_byte;
616 ulint value;
618 ut_ad(bit < IBUF_BITS_PER_PAGE);
619 #if IBUF_BITS_PER_PAGE % 2
620 # error "IBUF_BITS_PER_PAGE % 2 != 0"
621 #endif
622 ut_ad(ut_is_2pow(zip_size));
623 ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
625 if (!zip_size) {
626 bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
627 + bit;
628 } else {
629 bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
630 + bit;
633 byte_offset = bit_offset / 8;
634 bit_offset = bit_offset % 8;
636 ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
638 map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
640 value = ut_bit_get_nth(map_byte, bit_offset);
642 if (bit == IBUF_BITMAP_FREE) {
643 ut_ad(bit_offset + 1 < 8);
645 value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
648 return(value);
651 /********************************************************************//**
652 Sets the desired bit for a given page in a bitmap page. */
653 static
654 void
655 ibuf_bitmap_page_set_bits(
656 /*======================*/
657 page_t* page, /*!< in: bitmap page */
658 ulint page_no,/*!< in: page whose bits to set */
659 ulint zip_size,/*!< in: compressed page size in bytes;
660 0 for uncompressed pages */
661 ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
662 ulint val, /*!< in: value to set */
663 mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
665 ulint byte_offset;
666 ulint bit_offset;
667 ulint map_byte;
669 ut_ad(bit < IBUF_BITS_PER_PAGE);
670 #if IBUF_BITS_PER_PAGE % 2
671 # error "IBUF_BITS_PER_PAGE % 2 != 0"
672 #endif
673 ut_ad(ut_is_2pow(zip_size));
674 ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
675 #ifdef UNIV_IBUF_COUNT_DEBUG
676 ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
677 || (0 == ibuf_count_get(page_get_space_id(page),
678 page_no)));
679 #endif
680 if (!zip_size) {
681 bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
682 + bit;
683 } else {
684 bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
685 + bit;
688 byte_offset = bit_offset / 8;
689 bit_offset = bit_offset % 8;
691 ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
693 map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
695 if (bit == IBUF_BITMAP_FREE) {
696 ut_ad(bit_offset + 1 < 8);
697 ut_ad(val <= 3);
699 map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
700 map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
701 } else {
702 ut_ad(val <= 1);
703 map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
706 mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
707 MLOG_1BYTE, mtr);
710 /********************************************************************//**
711 Calculates the bitmap page number for a given page number.
712 @return the bitmap page number where the file page is mapped */
713 UNIV_INLINE
714 ulint
715 ibuf_bitmap_page_no_calc(
716 /*=====================*/
717 ulint zip_size, /*!< in: compressed page size in bytes;
718 0 for uncompressed pages */
719 ulint page_no) /*!< in: tablespace page number */
721 ut_ad(ut_is_2pow(zip_size));
723 if (!zip_size) {
724 return(FSP_IBUF_BITMAP_OFFSET
725 + (page_no & ~(UNIV_PAGE_SIZE - 1)));
726 } else {
727 return(FSP_IBUF_BITMAP_OFFSET
728 + (page_no & ~(zip_size - 1)));
732 /********************************************************************//**
733 Gets the ibuf bitmap page where the bits describing a given file page are
734 stored.
735 @return bitmap page where the file page is mapped, that is, the bitmap
736 page containing the descriptor bits for the file page; the bitmap page
737 is x-latched */
738 static
739 page_t*
740 ibuf_bitmap_get_map_page_func(
741 /*==========================*/
742 ulint space, /*!< in: space id of the file page */
743 ulint page_no,/*!< in: page number of the file page */
744 ulint zip_size,/*!< in: compressed page size in bytes;
745 0 for uncompressed pages */
746 const char* file, /*!< in: file name */
747 ulint line, /*!< in: line where called */
748 mtr_t* mtr) /*!< in: mtr */
750 buf_block_t* block;
752 block = buf_page_get_gen(space, zip_size,
753 ibuf_bitmap_page_no_calc(zip_size, page_no),
754 RW_X_LATCH, NULL, BUF_GET,
755 file, line, mtr);
756 buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
758 return(buf_block_get_frame(block));
761 /********************************************************************//**
762 Gets the ibuf bitmap page where the bits describing a given file page are
763 stored.
764 @return bitmap page where the file page is mapped, that is, the bitmap
765 page containing the descriptor bits for the file page; the bitmap page
766 is x-latched
767 @param space in: space id of the file page
768 @param page_no in: page number of the file page
769 @param zip_size in: compressed page size in bytes; 0 for uncompressed pages
770 @param mtr in: mini-transaction */
771 #define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
772 ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
773 __FILE__, __LINE__, mtr)
775 /************************************************************************//**
776 Sets the free bits of the page in the ibuf bitmap. This is done in a separate
777 mini-transaction, hence this operation does not restrict further work to only
778 ibuf bitmap operations, which would result if the latch to the bitmap page
779 were kept. */
780 UNIV_INLINE
781 void
782 ibuf_set_free_bits_low(
783 /*===================*/
784 ulint zip_size,/*!< in: compressed page size in bytes;
785 0 for uncompressed pages */
786 const buf_block_t* block, /*!< in: index page; free bits are set if
787 the index is non-clustered and page
788 level is 0 */
789 ulint val, /*!< in: value to set: < 4 */
790 mtr_t* mtr) /*!< in/out: mtr */
792 page_t* bitmap_page;
793 ulint space;
794 ulint page_no;
796 if (!page_is_leaf(buf_block_get_frame(block))) {
798 return;
801 space = buf_block_get_space(block);
802 page_no = buf_block_get_page_no(block);
803 bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
804 #ifdef UNIV_IBUF_DEBUG
805 # if 0
806 fprintf(stderr,
807 "Setting space %lu page %lu free bits to %lu should be %lu\n",
808 space, page_no, val,
809 ibuf_index_page_calc_free(zip_size, block));
810 # endif
812 ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
813 #endif /* UNIV_IBUF_DEBUG */
814 ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
815 IBUF_BITMAP_FREE, val, mtr);
818 /************************************************************************//**
819 Sets the free bit of the page in the ibuf bitmap. This is done in a separate
820 mini-transaction, hence this operation does not restrict further work to only
821 ibuf bitmap operations, which would result if the latch to the bitmap page
822 were kept. */
823 UNIV_INTERN
824 void
825 ibuf_set_free_bits_func(
826 /*====================*/
827 buf_block_t* block, /*!< in: index page of a non-clustered index;
828 free bit is reset if page level is 0 */
829 #ifdef UNIV_IBUF_DEBUG
830 ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
831 value which the bits must have before
832 setting; this is for debugging */
833 #endif /* UNIV_IBUF_DEBUG */
834 ulint val) /*!< in: value to set: < 4 */
836 mtr_t mtr;
837 page_t* page;
838 page_t* bitmap_page;
839 ulint space;
840 ulint page_no;
841 ulint zip_size;
843 page = buf_block_get_frame(block);
845 if (!page_is_leaf(page)) {
847 return;
850 mtr_start(&mtr);
852 space = buf_block_get_space(block);
853 page_no = buf_block_get_page_no(block);
854 zip_size = buf_block_get_zip_size(block);
855 bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
857 #ifdef UNIV_IBUF_DEBUG
858 if (max_val != ULINT_UNDEFINED) {
859 ulint old_val;
861 old_val = ibuf_bitmap_page_get_bits(
862 bitmap_page, page_no, zip_size,
863 IBUF_BITMAP_FREE, &mtr);
864 # if 0
865 if (old_val != max_val) {
866 fprintf(stderr,
867 "Ibuf: page %lu old val %lu max val %lu\n",
868 page_get_page_no(page),
869 old_val, max_val);
871 # endif
873 ut_a(old_val <= max_val);
875 # if 0
876 fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
877 page_get_page_no(page), val,
878 ibuf_index_page_calc_free(zip_size, block));
879 # endif
881 ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
882 #endif /* UNIV_IBUF_DEBUG */
883 ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
884 IBUF_BITMAP_FREE, val, &mtr);
885 mtr_commit(&mtr);
888 /************************************************************************//**
889 Resets the free bits of the page in the ibuf bitmap. This is done in a
890 separate mini-transaction, hence this operation does not restrict
891 further work to only ibuf bitmap operations, which would result if the
892 latch to the bitmap page were kept. NOTE: The free bits in the insert
893 buffer bitmap must never exceed the free space on a page. It is safe
894 to decrement or reset the bits in the bitmap in a mini-transaction
895 that is committed before the mini-transaction that affects the free
896 space. */
897 UNIV_INTERN
898 void
899 ibuf_reset_free_bits(
900 /*=================*/
901 buf_block_t* block) /*!< in: index page; free bits are set to 0
902 if the index is a non-clustered
903 non-unique, and page level is 0 */
905 ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
908 /**********************************************************************//**
909 Updates the free bits for an uncompressed page to reflect the present
910 state. Does this in the mtr given, which means that the latching
911 order rules virtually prevent any further operations for this OS
912 thread until mtr is committed. NOTE: The free bits in the insert
913 buffer bitmap must never exceed the free space on a page. It is safe
914 to set the free bits in the same mini-transaction that updated the
915 page. */
916 UNIV_INTERN
917 void
918 ibuf_update_free_bits_low(
919 /*======================*/
920 const buf_block_t* block, /*!< in: index page */
921 ulint max_ins_size, /*!< in: value of
922 maximum insert size
923 with reorganize before
924 the latest operation
925 performed to the page */
926 mtr_t* mtr) /*!< in/out: mtr */
928 ulint before;
929 ulint after;
931 ut_a(!buf_block_get_page_zip(block));
933 before = ibuf_index_page_calc_free_bits(0, max_ins_size);
935 after = ibuf_index_page_calc_free(0, block);
937 /* This approach cannot be used on compressed pages, since the
938 computed value of "before" often does not match the current
939 state of the bitmap. This is because the free space may
940 increase or decrease when a compressed page is reorganized. */
941 if (before != after) {
942 ibuf_set_free_bits_low(0, block, after, mtr);
946 /**********************************************************************//**
947 Updates the free bits for a compressed page to reflect the present
948 state. Does this in the mtr given, which means that the latching
949 order rules virtually prevent any further operations for this OS
950 thread until mtr is committed. NOTE: The free bits in the insert
951 buffer bitmap must never exceed the free space on a page. It is safe
952 to set the free bits in the same mini-transaction that updated the
953 page. */
954 UNIV_INTERN
955 void
956 ibuf_update_free_bits_zip(
957 /*======================*/
958 buf_block_t* block, /*!< in/out: index page */
959 mtr_t* mtr) /*!< in/out: mtr */
961 page_t* bitmap_page;
962 ulint space;
963 ulint page_no;
964 ulint zip_size;
965 ulint after;
967 space = buf_block_get_space(block);
968 page_no = buf_block_get_page_no(block);
969 zip_size = buf_block_get_zip_size(block);
971 ut_a(page_is_leaf(buf_block_get_frame(block)));
972 ut_a(zip_size);
974 bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
976 after = ibuf_index_page_calc_free_zip(zip_size, block);
978 if (after == 0) {
979 /* We move the page to the front of the buffer pool LRU list:
980 the purpose of this is to prevent those pages to which we
981 cannot make inserts using the insert buffer from slipping
982 out of the buffer pool */
984 buf_page_make_young(&block->page);
987 ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
988 IBUF_BITMAP_FREE, after, mtr);
991 /**********************************************************************//**
992 Updates the free bits for the two pages to reflect the present state.
993 Does this in the mtr given, which means that the latching order rules
994 virtually prevent any further operations until mtr is committed.
995 NOTE: The free bits in the insert buffer bitmap must never exceed the
996 free space on a page. It is safe to set the free bits in the same
997 mini-transaction that updated the pages. */
998 UNIV_INTERN
999 void
1000 ibuf_update_free_bits_for_two_pages_low(
1001 /*====================================*/
1002 ulint zip_size,/*!< in: compressed page size in bytes;
1003 0 for uncompressed pages */
1004 buf_block_t* block1, /*!< in: index page */
1005 buf_block_t* block2, /*!< in: index page */
1006 mtr_t* mtr) /*!< in: mtr */
1008 ulint state;
1010 /* As we have to x-latch two random bitmap pages, we have to acquire
1011 the bitmap mutex to prevent a deadlock with a similar operation
1012 performed by another OS thread. */
1014 mutex_enter(&ibuf_bitmap_mutex);
1016 state = ibuf_index_page_calc_free(zip_size, block1);
1018 ibuf_set_free_bits_low(zip_size, block1, state, mtr);
1020 state = ibuf_index_page_calc_free(zip_size, block2);
1022 ibuf_set_free_bits_low(zip_size, block2, state, mtr);
1024 mutex_exit(&ibuf_bitmap_mutex);
1027 /**********************************************************************//**
1028 Returns TRUE if the page is one of the fixed address ibuf pages.
1029 @return TRUE if a fixed address ibuf i/o page */
1030 UNIV_INLINE
1031 ibool
1032 ibuf_fixed_addr_page(
1033 /*=================*/
1034 ulint space, /*!< in: space id */
1035 ulint zip_size,/*!< in: compressed page size in bytes;
1036 0 for uncompressed pages */
1037 ulint page_no)/*!< in: page number */
1039 return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
1040 || ibuf_bitmap_page(zip_size, page_no));
1043 /***********************************************************************//**
1044 Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
1045 Must not be called when recv_no_ibuf_operations==TRUE.
1046 @return TRUE if level 2 or level 3 page */
1047 UNIV_INTERN
1048 ibool
1049 ibuf_page(
1050 /*======*/
1051 ulint space, /*!< in: space id */
1052 ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
1053 ulint page_no,/*!< in: page number */
1054 mtr_t* mtr) /*!< in: mtr which will contain an x-latch to the
1055 bitmap page if the page is not one of the fixed
1056 address ibuf pages, or NULL, in which case a new
1057 transaction is created. */
1059 ibool ret;
1060 mtr_t local_mtr;
1061 page_t* bitmap_page;
1063 ut_ad(!recv_no_ibuf_operations);
1065 if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
1067 return(TRUE);
1068 } else if (space != IBUF_SPACE_ID) {
1070 return(FALSE);
1073 ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
1075 if (mtr == NULL) {
1076 mtr = &local_mtr;
1077 mtr_start(mtr);
1080 bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
1082 ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
1083 IBUF_BITMAP_IBUF, mtr);
1085 if (mtr == &local_mtr) {
1086 mtr_commit(mtr);
1089 return(ret);
1092 /********************************************************************//**
1093 Returns the page number field of an ibuf record.
1094 @return page number */
1095 static
1096 ulint
1097 ibuf_rec_get_page_no(
1098 /*=================*/
1099 const rec_t* rec) /*!< in: ibuf record */
1101 const byte* field;
1102 ulint len;
1104 ut_ad(ibuf_inside());
1105 ut_ad(rec_get_n_fields_old(rec) > 2);
1107 field = rec_get_nth_field_old(rec, 1, &len);
1109 if (len == 1) {
1110 /* This is of the >= 4.1.x record format */
1111 ut_a(trx_sys_multiple_tablespace_format);
1113 field = rec_get_nth_field_old(rec, 2, &len);
1114 } else {
1115 ut_a(trx_doublewrite_must_reset_space_ids);
1116 ut_a(!trx_sys_multiple_tablespace_format);
1118 field = rec_get_nth_field_old(rec, 0, &len);
1121 ut_a(len == 4);
1123 return(mach_read_from_4(field));
1126 /********************************************************************//**
1127 Returns the space id field of an ibuf record. For < 4.1.x format records
1128 returns 0.
1129 @return space id */
1130 static
1131 ulint
1132 ibuf_rec_get_space(
1133 /*===============*/
1134 const rec_t* rec) /*!< in: ibuf record */
1136 const byte* field;
1137 ulint len;
1139 ut_ad(ibuf_inside());
1140 ut_ad(rec_get_n_fields_old(rec) > 2);
1142 field = rec_get_nth_field_old(rec, 1, &len);
1144 if (len == 1) {
1145 /* This is of the >= 4.1.x record format */
1147 ut_a(trx_sys_multiple_tablespace_format);
1148 field = rec_get_nth_field_old(rec, 0, &len);
1149 ut_a(len == 4);
1151 return(mach_read_from_4(field));
1154 ut_a(trx_doublewrite_must_reset_space_ids);
1155 ut_a(!trx_sys_multiple_tablespace_format);
1157 return(0);
1160 /********************************************************************//**
1161 Creates a dummy index for inserting a record to a non-clustered index.
1163 @return dummy index */
1164 static
1165 dict_index_t*
1166 ibuf_dummy_index_create(
1167 /*====================*/
1168 ulint n, /*!< in: number of fields */
1169 ibool comp) /*!< in: TRUE=use compact record format */
1171 dict_table_t* table;
1172 dict_index_t* index;
1174 table = dict_mem_table_create("IBUF_DUMMY",
1175 DICT_HDR_SPACE, n,
1176 comp ? DICT_TF_COMPACT : 0);
1178 index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
1179 DICT_HDR_SPACE, 0, n);
1181 index->table = table;
1183 /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
1184 index->cached = TRUE;
1186 return(index);
1188 /********************************************************************//**
1189 Add a column to the dummy index */
1190 static
1191 void
1192 ibuf_dummy_index_add_col(
1193 /*=====================*/
1194 dict_index_t* index, /*!< in: dummy index */
1195 const dtype_t* type, /*!< in: the data type of the column */
1196 ulint len) /*!< in: length of the column */
1198 ulint i = index->table->n_def;
1199 dict_mem_table_add_col(index->table, NULL, NULL,
1200 dtype_get_mtype(type),
1201 dtype_get_prtype(type),
1202 dtype_get_len(type));
1203 dict_index_add_col(index, index->table,
1204 dict_table_get_nth_col(index->table, i), len);
1206 /********************************************************************//**
1207 Deallocates a dummy index for inserting a record to a non-clustered index. */
1208 static
1209 void
1210 ibuf_dummy_index_free(
1211 /*==================*/
1212 dict_index_t* index) /*!< in, own: dummy index */
1214 dict_table_t* table = index->table;
1216 dict_mem_index_free(index);
1217 dict_mem_table_free(table);
1220 /*********************************************************************//**
1221 Builds the entry to insert into a non-clustered index when we have the
1222 corresponding record in an ibuf index.
1224 NOTE that as we copy pointers to fields in ibuf_rec, the caller must
1225 hold a latch to the ibuf_rec page as long as the entry is used!
1227 @return own: entry to insert to a non-clustered index */
1228 UNIV_INLINE
1229 dtuple_t*
1230 ibuf_build_entry_pre_4_1_x(
1231 /*=======================*/
1232 const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
1233 mem_heap_t* heap, /*!< in: heap where built */
1234 dict_index_t** pindex) /*!< out, own: dummy index that
1235 describes the entry */
1237 ulint i;
1238 ulint len;
1239 const byte* types;
1240 dtuple_t* tuple;
1241 ulint n_fields;
1243 ut_a(trx_doublewrite_must_reset_space_ids);
1244 ut_a(!trx_sys_multiple_tablespace_format);
1246 n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1247 tuple = dtuple_create(heap, n_fields);
1248 types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1250 ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1252 for (i = 0; i < n_fields; i++) {
1253 const byte* data;
1254 dfield_t* field;
1256 field = dtuple_get_nth_field(tuple, i);
1258 data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1260 dfield_set_data(field, data, len);
1262 dtype_read_for_order_and_null_size(
1263 dfield_get_type(field),
1264 types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1267 *pindex = ibuf_dummy_index_create(n_fields, FALSE);
1269 return(tuple);
1272 /*********************************************************************//**
1273 Builds the entry to insert into a non-clustered index when we have the
1274 corresponding record in an ibuf index.
1276 NOTE that as we copy pointers to fields in ibuf_rec, the caller must
1277 hold a latch to the ibuf_rec page as long as the entry is used!
1279 @return own: entry to insert to a non-clustered index */
1280 static
1281 dtuple_t*
1282 ibuf_build_entry_from_ibuf_rec(
1283 /*===========================*/
1284 const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
1285 mem_heap_t* heap, /*!< in: heap where built */
1286 dict_index_t** pindex) /*!< out, own: dummy index that
1287 describes the entry */
1289 dtuple_t* tuple;
1290 dfield_t* field;
1291 ulint n_fields;
1292 const byte* types;
1293 const byte* data;
1294 ulint len;
1295 ulint i;
1296 dict_index_t* index;
1298 data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1300 if (len > 1) {
1301 /* This a < 4.1.x format record */
1303 return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
1306 /* This a >= 4.1.x format record */
1308 ut_a(trx_sys_multiple_tablespace_format);
1309 ut_a(*data == 0);
1310 ut_a(rec_get_n_fields_old(ibuf_rec) > 4);
1312 n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1314 tuple = dtuple_create(heap, n_fields);
1316 types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1318 ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1);
1319 index = ibuf_dummy_index_create(
1320 n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1322 if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
1323 /* compact record format */
1324 len--;
1325 ut_a(*types == 0);
1326 types++;
1329 ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1331 for (i = 0; i < n_fields; i++) {
1332 field = dtuple_get_nth_field(tuple, i);
1334 data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1336 dfield_set_data(field, data, len);
1338 dtype_new_read_for_order_and_null_size(
1339 dfield_get_type(field),
1340 types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1342 ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
1345 /* Prevent an ut_ad() failure in page_zip_write_rec() by
1346 adding system columns to the dummy table pointed to by the
1347 dummy secondary index. The insert buffer is only used for
1348 secondary indexes, whose records never contain any system
1349 columns, such as DB_TRX_ID. */
1350 ut_d(dict_table_add_system_columns(index->table, index->table->heap));
1352 *pindex = index;
1354 return(tuple);
1357 /********************************************************************//**
1358 Returns the space taken by a stored non-clustered index entry if converted to
1359 an index record.
1360 @return size of index record in bytes + an upper limit of the space
1361 taken in the page directory */
1362 static
1363 ulint
1364 ibuf_rec_get_volume(
1365 /*================*/
1366 const rec_t* ibuf_rec)/*!< in: ibuf record */
1368 dtype_t dtype;
1369 ibool new_format = FALSE;
1370 ulint data_size = 0;
1371 ulint n_fields;
1372 const byte* types;
1373 const byte* data;
1374 ulint len;
1375 ulint i;
1376 ulint comp;
1378 ut_ad(ibuf_inside());
1379 ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
1381 data = rec_get_nth_field_old(ibuf_rec, 1, &len);
1383 if (len > 1) {
1384 /* < 4.1.x format record */
1386 ut_a(trx_doublewrite_must_reset_space_ids);
1387 ut_a(!trx_sys_multiple_tablespace_format);
1389 n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
1391 types = rec_get_nth_field_old(ibuf_rec, 1, &len);
1393 ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1394 comp = 0;
1395 } else {
1396 /* >= 4.1.x format record */
1398 ut_a(trx_sys_multiple_tablespace_format);
1399 ut_a(*data == 0);
1401 types = rec_get_nth_field_old(ibuf_rec, 3, &len);
1403 comp = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
1405 ut_a(comp <= 1);
1406 if (comp) {
1407 /* compact record format */
1408 ulint volume;
1409 dict_index_t* dummy_index;
1410 mem_heap_t* heap = mem_heap_create(500);
1411 dtuple_t* entry = ibuf_build_entry_from_ibuf_rec(
1412 ibuf_rec, heap, &dummy_index);
1413 volume = rec_get_converted_size(dummy_index, entry, 0);
1414 ibuf_dummy_index_free(dummy_index);
1415 mem_heap_free(heap);
1416 return(volume + page_dir_calc_reserved_space(1));
1419 n_fields = rec_get_n_fields_old(ibuf_rec) - 4;
1421 new_format = TRUE;
1424 for (i = 0; i < n_fields; i++) {
1425 if (new_format) {
1426 data = rec_get_nth_field_old(ibuf_rec, i + 4, &len);
1428 dtype_new_read_for_order_and_null_size(
1429 &dtype, types + i
1430 * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
1431 } else {
1432 data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
1434 dtype_read_for_order_and_null_size(
1435 &dtype, types + i
1436 * DATA_ORDER_NULL_TYPE_BUF_SIZE);
1439 if (len == UNIV_SQL_NULL) {
1440 data_size += dtype_get_sql_null_size(&dtype, comp);
1441 } else {
1442 data_size += len;
1446 return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
1447 + page_dir_calc_reserved_space(1));
1450 /*********************************************************************//**
1451 Builds the tuple to insert to an ibuf tree when we have an entry for a
1452 non-clustered index.
1454 NOTE that the original entry must be kept because we copy pointers to
1455 its fields.
1457 @return own: entry to insert into an ibuf index tree */
1458 static
1459 dtuple_t*
1460 ibuf_entry_build(
1461 /*=============*/
1462 dict_index_t* index, /*!< in: non-clustered index */
1463 const dtuple_t* entry, /*!< in: entry for a non-clustered index */
1464 ulint space, /*!< in: space id */
1465 ulint page_no,/*!< in: index page number where entry should
1466 be inserted */
1467 mem_heap_t* heap) /*!< in: heap into which to build */
1469 dtuple_t* tuple;
1470 dfield_t* field;
1471 const dfield_t* entry_field;
1472 ulint n_fields;
1473 byte* buf;
1474 byte* buf2;
1475 ulint i;
1477 /* Starting from 4.1.x, we have to build a tuple whose
1478 (1) first field is the space id,
1479 (2) the second field a single marker byte (0) to tell that this
1480 is a new format record,
1481 (3) the third contains the page number, and
1482 (4) the fourth contains the relevent type information of each data
1483 field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
1484 (a) 0 for b-trees in the old format, and
1485 (b) 1 for b-trees in the compact format, the first byte of the field
1486 being the marker (0);
1487 (5) and the rest of the fields are copied from entry. All fields
1488 in the tuple are ordered like the type binary in our insert buffer
1489 tree. */
1491 n_fields = dtuple_get_n_fields(entry);
1493 tuple = dtuple_create(heap, n_fields + 4);
1495 /* Store the space id in tuple */
1497 field = dtuple_get_nth_field(tuple, 0);
1499 buf = mem_heap_alloc(heap, 4);
1501 mach_write_to_4(buf, space);
1503 dfield_set_data(field, buf, 4);
1505 /* Store the marker byte field in tuple */
1507 field = dtuple_get_nth_field(tuple, 1);
1509 buf = mem_heap_alloc(heap, 1);
1511 /* We set the marker byte zero */
1513 mach_write_to_1(buf, 0);
1515 dfield_set_data(field, buf, 1);
1517 /* Store the page number in tuple */
1519 field = dtuple_get_nth_field(tuple, 2);
1521 buf = mem_heap_alloc(heap, 4);
1523 mach_write_to_4(buf, page_no);
1525 dfield_set_data(field, buf, 4);
1527 /* Store the type info in buf2, and add the fields from entry to
1528 tuple */
1529 buf2 = mem_heap_alloc(heap, n_fields
1530 * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1531 + dict_table_is_comp(index->table));
1532 if (dict_table_is_comp(index->table)) {
1533 *buf2++ = 0; /* write the compact format indicator */
1535 for (i = 0; i < n_fields; i++) {
1536 ulint fixed_len;
1537 const dict_field_t* ifield;
1539 /* We add 4 below because we have the 4 extra fields at the
1540 start of an ibuf record */
1542 field = dtuple_get_nth_field(tuple, i + 4);
1543 entry_field = dtuple_get_nth_field(entry, i);
1544 dfield_copy(field, entry_field);
1546 ifield = dict_index_get_nth_field(index, i);
1547 /* Prefix index columns of fixed-length columns are of
1548 fixed length. However, in the function call below,
1549 dfield_get_type(entry_field) contains the fixed length
1550 of the column in the clustered index. Replace it with
1551 the fixed length of the secondary index column. */
1552 fixed_len = ifield->fixed_len;
1554 #ifdef UNIV_DEBUG
1555 if (fixed_len) {
1556 /* dict_index_add_col() should guarantee these */
1557 ut_ad(fixed_len <= (ulint)
1558 dfield_get_type(entry_field)->len);
1559 if (ifield->prefix_len) {
1560 ut_ad(ifield->prefix_len == fixed_len);
1561 } else {
1562 ut_ad(fixed_len == (ulint)
1563 dfield_get_type(entry_field)->len);
1566 #endif /* UNIV_DEBUG */
1568 dtype_new_store_for_order_and_null_size(
1569 buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
1570 dfield_get_type(entry_field), fixed_len);
1573 /* Store the type info in buf2 to field 3 of tuple */
1575 field = dtuple_get_nth_field(tuple, 3);
1577 if (dict_table_is_comp(index->table)) {
1578 buf2--;
1581 dfield_set_data(field, buf2, n_fields
1582 * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
1583 + dict_table_is_comp(index->table));
1584 /* Set all the types in the new tuple binary */
1586 dtuple_set_types_binary(tuple, n_fields + 4);
1588 return(tuple);
1591 /*********************************************************************//**
1592 Builds a search tuple used to search buffered inserts for an index page.
1593 This is for < 4.1.x format records
1594 @return own: search tuple */
1595 static
1596 dtuple_t*
1597 ibuf_search_tuple_build(
1598 /*====================*/
1599 ulint space, /*!< in: space id */
1600 ulint page_no,/*!< in: index page number */
1601 mem_heap_t* heap) /*!< in: heap into which to build */
1603 dtuple_t* tuple;
1604 dfield_t* field;
1605 byte* buf;
1607 ut_a(space == 0);
1608 ut_a(trx_doublewrite_must_reset_space_ids);
1609 ut_a(!trx_sys_multiple_tablespace_format);
1611 tuple = dtuple_create(heap, 1);
1613 /* Store the page number in tuple */
1615 field = dtuple_get_nth_field(tuple, 0);
1617 buf = mem_heap_alloc(heap, 4);
1619 mach_write_to_4(buf, page_no);
1621 dfield_set_data(field, buf, 4);
1623 dtuple_set_types_binary(tuple, 1);
1625 return(tuple);
1628 /*********************************************************************//**
1629 Builds a search tuple used to search buffered inserts for an index page.
1630 This is for >= 4.1.x format records.
1631 @return own: search tuple */
1632 static
1633 dtuple_t*
1634 ibuf_new_search_tuple_build(
1635 /*========================*/
1636 ulint space, /*!< in: space id */
1637 ulint page_no,/*!< in: index page number */
1638 mem_heap_t* heap) /*!< in: heap into which to build */
1640 dtuple_t* tuple;
1641 dfield_t* field;
1642 byte* buf;
1644 ut_a(trx_sys_multiple_tablespace_format);
1646 tuple = dtuple_create(heap, 3);
1648 /* Store the space id in tuple */
1650 field = dtuple_get_nth_field(tuple, 0);
1652 buf = mem_heap_alloc(heap, 4);
1654 mach_write_to_4(buf, space);
1656 dfield_set_data(field, buf, 4);
1658 /* Store the new format record marker byte */
1660 field = dtuple_get_nth_field(tuple, 1);
1662 buf = mem_heap_alloc(heap, 1);
1664 mach_write_to_1(buf, 0);
1666 dfield_set_data(field, buf, 1);
1668 /* Store the page number in tuple */
1670 field = dtuple_get_nth_field(tuple, 2);
1672 buf = mem_heap_alloc(heap, 4);
1674 mach_write_to_4(buf, page_no);
1676 dfield_set_data(field, buf, 4);
1678 dtuple_set_types_binary(tuple, 3);
1680 return(tuple);
1683 /*********************************************************************//**
1684 Checks if there are enough pages in the free list of the ibuf tree that we
1685 dare to start a pessimistic insert to the insert buffer.
1686 @return TRUE if enough free pages in list */
1687 UNIV_INLINE
1688 ibool
1689 ibuf_data_enough_free_for_insert(void)
1690 /*==================================*/
1692 ut_ad(mutex_own(&ibuf_mutex));
1694 /* We want a big margin of free pages, because a B-tree can sometimes
1695 grow in size also if records are deleted from it, as the node pointers
1696 can change, and we must make sure that we are able to delete the
1697 inserts buffered for pages that we read to the buffer pool, without
1698 any risk of running out of free space in the insert buffer. */
1700 return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
1703 /*********************************************************************//**
1704 Checks if there are enough pages in the free list of the ibuf tree that we
1705 should remove them and free to the file space management.
1706 @return TRUE if enough free pages in list */
1707 UNIV_INLINE
1708 ibool
1709 ibuf_data_too_much_free(void)
1710 /*=========================*/
1712 ut_ad(mutex_own(&ibuf_mutex));
1714 return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
1717 /*********************************************************************//**
1718 Allocates a new page from the ibuf file segment and adds it to the free
1719 list.
1720 @return DB_SUCCESS, or DB_STRONG_FAIL if no space left */
1721 static
1722 ulint
1723 ibuf_add_free_page(void)
1724 /*====================*/
1726 mtr_t mtr;
1727 page_t* header_page;
1728 ulint flags;
1729 ulint zip_size;
1730 buf_block_t* block;
1731 page_t* page;
1732 page_t* root;
1733 page_t* bitmap_page;
1735 mtr_start(&mtr);
1737 /* Acquire the fsp latch before the ibuf header, obeying the latching
1738 order */
1739 mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
1740 zip_size = dict_table_flags_to_zip_size(flags);
1742 header_page = ibuf_header_page_get(&mtr);
1744 /* Allocate a new page: NOTE that if the page has been a part of a
1745 non-clustered index which has subsequently been dropped, then the
1746 page may have buffered inserts in the insert buffer, and these
1747 should be deleted from there. These get deleted when the page
1748 allocation creates the page in buffer. Thus the call below may end
1749 up calling the insert buffer routines and, as we yet have no latches
1750 to insert buffer tree pages, these routines can run without a risk
1751 of a deadlock. This is the reason why we created a special ibuf
1752 header page apart from the ibuf tree. */
1754 block = fseg_alloc_free_page(
1755 header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
1756 &mtr);
1758 if (block == NULL) {
1759 mtr_commit(&mtr);
1761 return(DB_STRONG_FAIL);
1764 ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
1765 ibuf_enter();
1766 mutex_enter(&ibuf_mutex);
1767 root = ibuf_tree_root_get(&mtr);
1769 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
1770 page = buf_block_get_frame(block);
1772 /* Add the page to the free list and update the ibuf size data */
1774 flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1775 page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1777 mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
1778 MLOG_2BYTES, &mtr);
1780 ibuf->seg_size++;
1781 ibuf->free_list_len++;
1783 /* Set the bit indicating that this page is now an ibuf tree page
1784 (level 2 page) */
1786 bitmap_page = ibuf_bitmap_get_map_page(
1787 IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
1789 ibuf_bitmap_page_set_bits(
1790 bitmap_page, buf_block_get_page_no(block), zip_size,
1791 IBUF_BITMAP_IBUF, TRUE, &mtr);
1793 mtr_commit(&mtr);
1795 mutex_exit(&ibuf_mutex);
1797 ibuf_exit();
1799 return(DB_SUCCESS);
1802 /*********************************************************************//**
1803 Removes a page from the free list and frees it to the fsp system. */
1804 static
1805 void
1806 ibuf_remove_free_page(void)
1807 /*=======================*/
1809 mtr_t mtr;
1810 mtr_t mtr2;
1811 page_t* header_page;
1812 ulint flags;
1813 ulint zip_size;
1814 ulint page_no;
1815 page_t* page;
1816 page_t* root;
1817 page_t* bitmap_page;
1819 mtr_start(&mtr);
1821 /* Acquire the fsp latch before the ibuf header, obeying the latching
1822 order */
1823 mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
1824 zip_size = dict_table_flags_to_zip_size(flags);
1826 header_page = ibuf_header_page_get(&mtr);
1828 /* Prevent pessimistic inserts to insert buffer trees for a while */
1829 mutex_enter(&ibuf_pessimistic_insert_mutex);
1831 ibuf_enter();
1833 mutex_enter(&ibuf_mutex);
1835 if (!ibuf_data_too_much_free()) {
1837 mutex_exit(&ibuf_mutex);
1839 ibuf_exit();
1841 mutex_exit(&ibuf_pessimistic_insert_mutex);
1843 mtr_commit(&mtr);
1845 return;
1848 mtr_start(&mtr2);
1850 root = ibuf_tree_root_get(&mtr2);
1852 page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1853 &mtr2).page;
1855 /* NOTE that we must release the latch on the ibuf tree root
1856 because in fseg_free_page we access level 1 pages, and the root
1857 is a level 2 page. */
1859 mtr_commit(&mtr2);
1860 mutex_exit(&ibuf_mutex);
1862 ibuf_exit();
1864 /* Since pessimistic inserts were prevented, we know that the
1865 page is still in the free list. NOTE that also deletes may take
1866 pages from the free list, but they take them from the start, and
1867 the free list was so long that they cannot have taken the last
1868 page from it. */
1870 fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
1871 IBUF_SPACE_ID, page_no, &mtr);
1873 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
1874 buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
1875 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1877 ibuf_enter();
1879 mutex_enter(&ibuf_mutex);
1881 root = ibuf_tree_root_get(&mtr);
1883 ut_ad(page_no == flst_get_last(root + PAGE_HEADER
1884 + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
1887 buf_block_t* block;
1889 block = buf_page_get(
1890 IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
1892 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
1894 page = buf_block_get_frame(block);
1897 /* Remove the page from the free list and update the ibuf size data */
1899 flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
1900 page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
1902 ibuf->seg_size--;
1903 ibuf->free_list_len--;
1905 mutex_exit(&ibuf_pessimistic_insert_mutex);
1907 /* Set the bit indicating that this page is no more an ibuf tree page
1908 (level 2 page) */
1910 bitmap_page = ibuf_bitmap_get_map_page(
1911 IBUF_SPACE_ID, page_no, zip_size, &mtr);
1913 ibuf_bitmap_page_set_bits(
1914 bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
1916 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
1917 buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
1918 #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
1919 mtr_commit(&mtr);
1921 mutex_exit(&ibuf_mutex);
1923 ibuf_exit();
1926 /***********************************************************************//**
1927 Frees excess pages from the ibuf free list. This function is called when an OS
1928 thread calls fsp services to allocate a new file segment, or a new page to a
1929 file segment, and the thread did not own the fsp latch before this call. */
1930 UNIV_INTERN
1931 void
1932 ibuf_free_excess_pages(void)
1933 /*========================*/
1935 ulint i;
1937 #ifdef UNIV_SYNC_DEBUG
1938 ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
1939 RW_LOCK_EX));
1940 #endif /* UNIV_SYNC_DEBUG */
1942 ut_ad(rw_lock_get_x_lock_count(
1943 fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
1945 ut_ad(!ibuf_inside());
1947 /* NOTE: We require that the thread did not own the latch before,
1948 because then we know that we can obey the correct latching order
1949 for ibuf latches */
1951 if (!ibuf) {
1952 /* Not yet initialized; not sure if this is possible, but
1953 does no harm to check for it. */
1955 return;
1958 /* Free at most a few pages at a time, so that we do not delay the
1959 requested service too much */
1961 for (i = 0; i < 4; i++) {
1963 mutex_enter(&ibuf_mutex);
1965 if (!ibuf_data_too_much_free()) {
1967 mutex_exit(&ibuf_mutex);
1969 return;
1972 mutex_exit(&ibuf_mutex);
1974 ibuf_remove_free_page();
1978 /*********************************************************************//**
1979 Reads page numbers from a leaf in an ibuf tree.
1980 @return a lower limit for the combined volume of records which will be
1981 merged */
1982 static
1983 ulint
1984 ibuf_get_merge_page_nos(
1985 /*====================*/
1986 ibool contract,/*!< in: TRUE if this function is called to
1987 contract the tree, FALSE if this is called
1988 when a single page becomes full and we look
1989 if it pays to read also nearby pages */
1990 rec_t* rec, /*!< in: record from which we read up and down
1991 in the chain of records */
1992 ulint* space_ids,/*!< in/out: space id's of the pages */
1993 ib_int64_t* space_versions,/*!< in/out: tablespace version
1994 timestamps; used to prevent reading in old
1995 pages after DISCARD + IMPORT tablespace */
1996 ulint* page_nos,/*!< in/out: buffer for at least
1997 IBUF_MAX_N_PAGES_MERGED many page numbers;
1998 the page numbers are in an ascending order */
1999 ulint* n_stored)/*!< out: number of page numbers stored to
2000 page_nos in this function */
2002 ulint prev_page_no;
2003 ulint prev_space_id;
2004 ulint first_page_no;
2005 ulint first_space_id;
2006 ulint rec_page_no;
2007 ulint rec_space_id;
2008 ulint sum_volumes;
2009 ulint volume_for_page;
2010 ulint rec_volume;
2011 ulint limit;
2012 ulint n_pages;
2014 *n_stored = 0;
2016 limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4);
2018 if (page_rec_is_supremum(rec)) {
2020 rec = page_rec_get_prev(rec);
2023 if (page_rec_is_infimum(rec)) {
2025 rec = page_rec_get_next(rec);
2028 if (page_rec_is_supremum(rec)) {
2030 return(0);
2033 first_page_no = ibuf_rec_get_page_no(rec);
2034 first_space_id = ibuf_rec_get_space(rec);
2035 n_pages = 0;
2036 prev_page_no = 0;
2037 prev_space_id = 0;
2039 /* Go backwards from the first rec until we reach the border of the
2040 'merge area', or the page start or the limit of storeable pages is
2041 reached */
2043 while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
2045 rec_page_no = ibuf_rec_get_page_no(rec);
2046 rec_space_id = ibuf_rec_get_space(rec);
2048 if (rec_space_id != first_space_id
2049 || (rec_page_no / IBUF_MERGE_AREA)
2050 != (first_page_no / IBUF_MERGE_AREA)) {
2052 break;
2055 if (rec_page_no != prev_page_no
2056 || rec_space_id != prev_space_id) {
2057 n_pages++;
2060 prev_page_no = rec_page_no;
2061 prev_space_id = rec_space_id;
2063 rec = page_rec_get_prev(rec);
2066 rec = page_rec_get_next(rec);
2068 /* At the loop start there is no prev page; we mark this with a pair
2069 of space id, page no (0, 0) for which there can never be entries in
2070 the insert buffer */
2072 prev_page_no = 0;
2073 prev_space_id = 0;
2074 sum_volumes = 0;
2075 volume_for_page = 0;
2077 while (*n_stored < limit) {
2078 if (page_rec_is_supremum(rec)) {
2079 /* When no more records available, mark this with
2080 another 'impossible' pair of space id, page no */
2081 rec_page_no = 1;
2082 rec_space_id = 0;
2083 } else {
2084 rec_page_no = ibuf_rec_get_page_no(rec);
2085 rec_space_id = ibuf_rec_get_space(rec);
2086 /* In the system tablespace, the smallest
2087 possible secondary index leaf page number is
2088 bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
2089 other tablespaces, the clustered index tree is
2090 created at page 3, which makes page 4 the
2091 smallest possible secondary index leaf page
2092 (and that only after DROP INDEX). */
2093 ut_ad(rec_page_no
2094 > IBUF_TREE_ROOT_PAGE_NO - (rec_space_id != 0));
2097 #ifdef UNIV_IBUF_DEBUG
2098 ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
2099 #endif
2100 if ((rec_space_id != prev_space_id
2101 || rec_page_no != prev_page_no)
2102 && (prev_space_id != 0 || prev_page_no != 0)) {
2104 if ((prev_page_no == first_page_no
2105 && prev_space_id == first_space_id)
2106 || contract
2107 || (volume_for_page
2108 > ((IBUF_MERGE_THRESHOLD - 1)
2109 * 4 * UNIV_PAGE_SIZE
2110 / IBUF_PAGE_SIZE_PER_FREE_SPACE)
2111 / IBUF_MERGE_THRESHOLD)) {
2113 space_ids[*n_stored] = prev_space_id;
2114 space_versions[*n_stored]
2115 = fil_space_get_version(prev_space_id);
2116 page_nos[*n_stored] = prev_page_no;
2118 (*n_stored)++;
2120 sum_volumes += volume_for_page;
2123 if (rec_space_id != first_space_id
2124 || rec_page_no / IBUF_MERGE_AREA
2125 != first_page_no / IBUF_MERGE_AREA) {
2127 break;
2130 volume_for_page = 0;
2133 if (rec_page_no == 1 && rec_space_id == 0) {
2134 /* Supremum record */
2136 break;
2139 rec_volume = ibuf_rec_get_volume(rec);
2141 volume_for_page += rec_volume;
2143 prev_page_no = rec_page_no;
2144 prev_space_id = rec_space_id;
2146 rec = page_rec_get_next(rec);
2149 #ifdef UNIV_IBUF_DEBUG
2150 ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
2151 #endif
2152 #if 0
2153 fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
2154 *n_stored, sum_volumes);
2155 #endif
2156 return(sum_volumes);
2159 /*********************************************************************//**
2160 Contracts insert buffer trees by reading pages to the buffer pool.
2161 @return a lower limit for the combined size in bytes of entries which
2162 will be merged from ibuf trees to the pages read, 0 if ibuf is
2163 empty */
2164 static
2165 ulint
2166 ibuf_contract_ext(
2167 /*==============*/
2168 ulint* n_pages,/*!< out: number of pages to which merged */
2169 ibool sync) /*!< in: TRUE if the caller wants to wait for the
2170 issued read with the highest tablespace address
2171 to complete */
2173 btr_pcur_t pcur;
2174 ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2175 ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2176 ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2177 ulint n_stored;
2178 ulint sum_sizes;
2179 mtr_t mtr;
2181 *n_pages = 0;
2182 ut_ad(!ibuf_inside());
2184 mutex_enter(&ibuf_mutex);
2186 if (ibuf->empty) {
2187 ibuf_is_empty:
2188 mutex_exit(&ibuf_mutex);
2190 return(0);
2193 mtr_start(&mtr);
2195 ibuf_enter();
2197 /* Open a cursor to a randomly chosen leaf of the tree, at a random
2198 position within the leaf */
2200 btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
2202 if (page_get_n_recs(btr_pcur_get_page(&pcur)) == 0) {
2203 /* When the ibuf tree is emptied completely, the last record
2204 is removed using an optimistic delete and ibuf_size_update
2205 is not called, causing ibuf->empty to remain FALSE. If we do
2206 not reset it to TRUE here then database shutdown will hang
2207 in the loop in ibuf_contract_for_n_pages. */
2209 ibuf->empty = TRUE;
2211 ibuf_exit();
2213 mtr_commit(&mtr);
2214 btr_pcur_close(&pcur);
2216 goto ibuf_is_empty;
2219 mutex_exit(&ibuf_mutex);
2221 sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
2222 space_ids, space_versions,
2223 page_nos, &n_stored);
2224 #if 0 /* defined UNIV_IBUF_DEBUG */
2225 fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
2226 sync, n_stored, sum_sizes);
2227 #endif
2228 ibuf_exit();
2230 mtr_commit(&mtr);
2231 btr_pcur_close(&pcur);
2233 buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
2234 n_stored);
2235 *n_pages = n_stored;
2237 return(sum_sizes + 1);
2240 /*********************************************************************//**
2241 Contracts insert buffer trees by reading pages to the buffer pool.
2242 @return a lower limit for the combined size in bytes of entries which
2243 will be merged from ibuf trees to the pages read, 0 if ibuf is
2244 empty */
2245 UNIV_INTERN
2246 ulint
2247 ibuf_contract(
2248 /*==========*/
2249 ibool sync) /*!< in: TRUE if the caller wants to wait for the
2250 issued read with the highest tablespace address
2251 to complete */
2253 ulint n_pages;
2255 return(ibuf_contract_ext(&n_pages, sync));
2258 /*********************************************************************//**
2259 Contracts insert buffer trees by reading pages to the buffer pool.
2260 @return a lower limit for the combined size in bytes of entries which
2261 will be merged from ibuf trees to the pages read, 0 if ibuf is
2262 empty */
2263 UNIV_INTERN
2264 ulint
2265 ibuf_contract_for_n_pages(
2266 /*======================*/
2267 ibool sync, /*!< in: TRUE if the caller wants to wait for the
2268 issued read with the highest tablespace address
2269 to complete */
2270 ulint n_pages)/*!< in: try to read at least this many pages to
2271 the buffer pool and merge the ibuf contents to
2272 them */
2274 ulint sum_bytes = 0;
2275 ulint sum_pages = 0;
2276 ulint n_bytes;
2277 ulint n_pag2;
2279 while (sum_pages < n_pages) {
2280 n_bytes = ibuf_contract_ext(&n_pag2, sync);
2282 if (n_bytes == 0) {
2283 return(sum_bytes);
2286 sum_bytes += n_bytes;
2287 sum_pages += n_pag2;
2290 return(sum_bytes);
2293 /*********************************************************************//**
2294 Contract insert buffer trees after insert if they are too big. */
2295 UNIV_INLINE
2296 void
2297 ibuf_contract_after_insert(
2298 /*=======================*/
2299 ulint entry_size) /*!< in: size of a record which was inserted
2300 into an ibuf tree */
2302 ibool sync;
2303 ulint sum_sizes;
2304 ulint size;
2306 mutex_enter(&ibuf_mutex);
2308 if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
2309 mutex_exit(&ibuf_mutex);
2311 return;
2314 sync = FALSE;
2316 if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) {
2318 sync = TRUE;
2321 mutex_exit(&ibuf_mutex);
2323 /* Contract at least entry_size many bytes */
2324 sum_sizes = 0;
2325 size = 1;
2327 while ((size > 0) && (sum_sizes < entry_size)) {
2329 size = ibuf_contract(sync);
2330 sum_sizes += size;
2334 /*********************************************************************//**
2335 Gets an upper limit for the combined size of entries buffered in the insert
2336 buffer for a given page.
2337 @return upper limit for the volume of buffered inserts for the index
2338 page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
2339 several pages in the insert buffer */
2340 static
2341 ulint
2342 ibuf_get_volume_buffered(
2343 /*=====================*/
2344 btr_pcur_t* pcur, /*!< in: pcur positioned at a place in an
2345 insert buffer tree where we would insert an
2346 entry for the index page whose number is
2347 page_no, latch mode has to be BTR_MODIFY_PREV
2348 or BTR_MODIFY_TREE */
2349 ulint space, /*!< in: space id */
2350 ulint page_no,/*!< in: page number of an index page */
2351 mtr_t* mtr) /*!< in: mtr */
2353 ulint volume;
2354 rec_t* rec;
2355 page_t* page;
2356 ulint prev_page_no;
2357 page_t* prev_page;
2358 ulint next_page_no;
2359 page_t* next_page;
2361 ut_a(trx_sys_multiple_tablespace_format);
2363 ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
2364 || (pcur->latch_mode == BTR_MODIFY_TREE));
2366 /* Count the volume of records earlier in the alphabetical order than
2367 pcur */
2369 volume = 0;
2371 rec = btr_pcur_get_rec(pcur);
2372 page = page_align(rec);
2374 if (page_rec_is_supremum(rec)) {
2375 rec = page_rec_get_prev(rec);
2378 for (;;) {
2379 if (page_rec_is_infimum(rec)) {
2381 break;
2384 if (page_no != ibuf_rec_get_page_no(rec)
2385 || space != ibuf_rec_get_space(rec)) {
2387 goto count_later;
2390 volume += ibuf_rec_get_volume(rec);
2392 rec = page_rec_get_prev(rec);
2395 /* Look at the previous page */
2397 prev_page_no = btr_page_get_prev(page, mtr);
2399 if (prev_page_no == FIL_NULL) {
2401 goto count_later;
2405 buf_block_t* block;
2407 block = buf_page_get(
2408 IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, mtr);
2410 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
2413 prev_page = buf_block_get_frame(block);
2416 #ifdef UNIV_BTR_DEBUG
2417 ut_a(btr_page_get_next(prev_page, mtr)
2418 == page_get_page_no(page));
2419 #endif /* UNIV_BTR_DEBUG */
2421 rec = page_get_supremum_rec(prev_page);
2422 rec = page_rec_get_prev(rec);
2424 for (;;) {
2425 if (page_rec_is_infimum(rec)) {
2427 /* We cannot go to yet a previous page, because we
2428 do not have the x-latch on it, and cannot acquire one
2429 because of the latching order: we have to give up */
2431 return(UNIV_PAGE_SIZE);
2434 if (page_no != ibuf_rec_get_page_no(rec)
2435 || space != ibuf_rec_get_space(rec)) {
2437 goto count_later;
2440 volume += ibuf_rec_get_volume(rec);
2442 rec = page_rec_get_prev(rec);
2445 count_later:
2446 rec = btr_pcur_get_rec(pcur);
2448 if (!page_rec_is_supremum(rec)) {
2449 rec = page_rec_get_next(rec);
2452 for (;;) {
2453 if (page_rec_is_supremum(rec)) {
2455 break;
2458 if (page_no != ibuf_rec_get_page_no(rec)
2459 || space != ibuf_rec_get_space(rec)) {
2461 return(volume);
2464 volume += ibuf_rec_get_volume(rec);
2466 rec = page_rec_get_next(rec);
2469 /* Look at the next page */
2471 next_page_no = btr_page_get_next(page, mtr);
2473 if (next_page_no == FIL_NULL) {
2475 return(volume);
2479 buf_block_t* block;
2481 block = buf_page_get(
2482 IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, mtr);
2484 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
2487 next_page = buf_block_get_frame(block);
2490 #ifdef UNIV_BTR_DEBUG
2491 ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
2492 #endif /* UNIV_BTR_DEBUG */
2494 rec = page_get_infimum_rec(next_page);
2495 rec = page_rec_get_next(rec);
2497 for (;;) {
2498 if (page_rec_is_supremum(rec)) {
2500 /* We give up */
2502 return(UNIV_PAGE_SIZE);
2505 if (page_no != ibuf_rec_get_page_no(rec)
2506 || space != ibuf_rec_get_space(rec)) {
2508 return(volume);
2511 volume += ibuf_rec_get_volume(rec);
2513 rec = page_rec_get_next(rec);
2517 /*********************************************************************//**
2518 Reads the biggest tablespace id from the high end of the insert buffer
2519 tree and updates the counter in fil_system. */
2520 UNIV_INTERN
2521 void
2522 ibuf_update_max_tablespace_id(void)
2523 /*===============================*/
2525 ulint max_space_id;
2526 const rec_t* rec;
2527 const byte* field;
2528 ulint len;
2529 btr_pcur_t pcur;
2530 mtr_t mtr;
2532 ut_a(!dict_table_is_comp(ibuf->index->table));
2534 ibuf_enter();
2536 mtr_start(&mtr);
2538 btr_pcur_open_at_index_side(
2539 FALSE, ibuf->index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
2541 btr_pcur_move_to_prev(&pcur, &mtr);
2543 if (btr_pcur_is_before_first_on_page(&pcur)) {
2544 /* The tree is empty */
2546 max_space_id = 0;
2547 } else {
2548 rec = btr_pcur_get_rec(&pcur);
2550 field = rec_get_nth_field_old(rec, 0, &len);
2552 ut_a(len == 4);
2554 max_space_id = mach_read_from_4(field);
2557 mtr_commit(&mtr);
2558 ibuf_exit();
2560 /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
2562 fil_set_max_space_id_if_bigger(max_space_id);
2565 /*********************************************************************//**
2566 Makes an index insert to the insert buffer, instead of directly to the disk
2567 page, if this is possible.
2568 @return DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
2569 static
2570 ulint
2571 ibuf_insert_low(
2572 /*============*/
2573 ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
2574 const dtuple_t* entry, /*!< in: index entry to insert */
2575 ulint entry_size,
2576 /*!< in: rec_get_converted_size(index, entry) */
2577 dict_index_t* index, /*!< in: index where to insert; must not be
2578 unique or clustered */
2579 ulint space, /*!< in: space id where to insert */
2580 ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
2581 ulint page_no,/*!< in: page number where to insert */
2582 que_thr_t* thr) /*!< in: query thread */
2584 big_rec_t* dummy_big_rec;
2585 btr_pcur_t pcur;
2586 btr_cur_t* cursor;
2587 dtuple_t* ibuf_entry;
2588 mem_heap_t* heap;
2589 ulint buffered;
2590 rec_t* ins_rec;
2591 ibool old_bit_value;
2592 page_t* bitmap_page;
2593 page_t* root;
2594 ulint err;
2595 ibool do_merge;
2596 ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
2597 ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
2598 ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
2599 ulint n_stored;
2600 ulint bits;
2601 mtr_t mtr;
2602 mtr_t bitmap_mtr;
2604 ut_a(!dict_index_is_clust(index));
2605 ut_ad(dtuple_check_typed(entry));
2606 ut_ad(ut_is_2pow(zip_size));
2608 ut_a(trx_sys_multiple_tablespace_format);
2610 do_merge = FALSE;
2612 mutex_enter(&ibuf_mutex);
2614 if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
2615 /* Insert buffer is now too big, contract it but do not try
2616 to insert */
2618 mutex_exit(&ibuf_mutex);
2620 #ifdef UNIV_IBUF_DEBUG
2621 fputs("Ibuf too big\n", stderr);
2622 #endif
2623 /* Use synchronous contract (== TRUE) */
2624 ibuf_contract(TRUE);
2626 return(DB_STRONG_FAIL);
2629 mutex_exit(&ibuf_mutex);
2631 if (mode == BTR_MODIFY_TREE) {
2632 mutex_enter(&ibuf_pessimistic_insert_mutex);
2634 ibuf_enter();
2636 mutex_enter(&ibuf_mutex);
2638 while (!ibuf_data_enough_free_for_insert()) {
2640 mutex_exit(&ibuf_mutex);
2642 ibuf_exit();
2644 mutex_exit(&ibuf_pessimistic_insert_mutex);
2646 err = ibuf_add_free_page();
2648 if (err == DB_STRONG_FAIL) {
2650 return(err);
2653 mutex_enter(&ibuf_pessimistic_insert_mutex);
2655 ibuf_enter();
2657 mutex_enter(&ibuf_mutex);
2659 } else {
2660 ibuf_enter();
2663 heap = mem_heap_create(512);
2665 /* Build the entry which contains the space id and the page number as
2666 the first fields and the type information for other fields, and which
2667 will be inserted to the insert buffer. */
2669 ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap);
2671 /* Open a cursor to the insert buffer tree to calculate if we can add
2672 the new entry to it without exceeding the free space limit for the
2673 page. */
2675 mtr_start(&mtr);
2677 btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
2679 /* Find out the volume of already buffered inserts for the same index
2680 page */
2681 buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr);
2683 #ifdef UNIV_IBUF_COUNT_DEBUG
2684 ut_a((buffered == 0) || ibuf_count_get(space, page_no));
2685 #endif
2686 mtr_start(&bitmap_mtr);
2688 bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
2689 zip_size, &bitmap_mtr);
2691 /* We check if the index page is suitable for buffered entries */
2693 if (buf_page_peek(space, page_no)
2694 || lock_rec_expl_exist_on_page(space, page_no)) {
2695 err = DB_STRONG_FAIL;
2697 mtr_commit(&bitmap_mtr);
2699 goto function_exit;
2702 bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
2703 IBUF_BITMAP_FREE, &bitmap_mtr);
2705 if (buffered + entry_size + page_dir_calc_reserved_space(1)
2706 > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
2707 mtr_commit(&bitmap_mtr);
2709 /* It may not fit */
2710 err = DB_STRONG_FAIL;
2712 do_merge = TRUE;
2714 ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
2715 space_ids, space_versions,
2716 page_nos, &n_stored);
2717 goto function_exit;
2720 /* Set the bitmap bit denoting that the insert buffer contains
2721 buffered entries for this index page, if the bit is not set yet */
2723 old_bit_value = ibuf_bitmap_page_get_bits(
2724 bitmap_page, page_no, zip_size,
2725 IBUF_BITMAP_BUFFERED, &bitmap_mtr);
2727 if (!old_bit_value) {
2728 ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
2729 IBUF_BITMAP_BUFFERED, TRUE,
2730 &bitmap_mtr);
2733 mtr_commit(&bitmap_mtr);
2735 cursor = btr_pcur_get_btr_cur(&pcur);
2737 if (mode == BTR_MODIFY_PREV) {
2738 err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
2739 ibuf_entry, &ins_rec,
2740 &dummy_big_rec, 0, thr, &mtr);
2741 if (err == DB_SUCCESS) {
2742 /* Update the page max trx id field */
2743 page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
2744 thr_get_trx(thr)->id, &mtr);
2746 } else {
2747 ut_ad(mode == BTR_MODIFY_TREE);
2749 /* We acquire an x-latch to the root page before the insert,
2750 because a pessimistic insert releases the tree x-latch,
2751 which would cause the x-latching of the root after that to
2752 break the latching order. */
2754 root = ibuf_tree_root_get(&mtr);
2756 err = btr_cur_optimistic_insert(
2757 BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
2758 cursor, ibuf_entry, &ins_rec,
2759 &dummy_big_rec, 0, thr, &mtr);
2761 if (err == DB_FAIL) {
2762 err = btr_cur_pessimistic_insert(
2763 BTR_NO_LOCKING_FLAG
2764 | BTR_NO_UNDO_LOG_FLAG,
2765 cursor, ibuf_entry, &ins_rec,
2766 &dummy_big_rec, 0, thr, &mtr);
2769 if (err == DB_SUCCESS) {
2770 /* Update the page max trx id field */
2771 page_update_max_trx_id(btr_cur_get_block(cursor), NULL,
2772 thr_get_trx(thr)->id, &mtr);
2775 ibuf_size_update(root, &mtr);
2778 function_exit:
2779 #ifdef UNIV_IBUF_COUNT_DEBUG
2780 if (err == DB_SUCCESS) {
2781 fprintf(stderr,
2782 "Incrementing ibuf count of space %lu page %lu\n"
2783 "from %lu by 1\n", space, page_no,
2784 ibuf_count_get(space, page_no));
2786 ibuf_count_set(space, page_no,
2787 ibuf_count_get(space, page_no) + 1);
2789 #endif
2790 if (mode == BTR_MODIFY_TREE) {
2792 mutex_exit(&ibuf_mutex);
2793 mutex_exit(&ibuf_pessimistic_insert_mutex);
2796 mtr_commit(&mtr);
2797 btr_pcur_close(&pcur);
2798 ibuf_exit();
2800 mem_heap_free(heap);
2802 if (err == DB_SUCCESS) {
2803 mutex_enter(&ibuf_mutex);
2805 ibuf->empty = FALSE;
2806 ibuf->n_inserts++;
2808 mutex_exit(&ibuf_mutex);
2810 if (mode == BTR_MODIFY_TREE) {
2811 ibuf_contract_after_insert(entry_size);
2815 if (do_merge) {
2816 #ifdef UNIV_IBUF_DEBUG
2817 ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
2818 #endif
2819 buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
2820 page_nos, n_stored);
2823 return(err);
2826 /*********************************************************************//**
2827 Makes an index insert to the insert buffer, instead of directly to the disk
2828 page, if this is possible. Does not do insert if the index is clustered
2829 or unique.
2830 @return TRUE if success */
2831 UNIV_INTERN
2832 ibool
2833 ibuf_insert(
2834 /*========*/
2835 const dtuple_t* entry, /*!< in: index entry to insert */
2836 dict_index_t* index, /*!< in: index where to insert */
2837 ulint space, /*!< in: space id where to insert */
2838 ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
2839 ulint page_no,/*!< in: page number where to insert */
2840 que_thr_t* thr) /*!< in: query thread */
2842 ulint err;
2843 ulint entry_size;
2845 ut_a(trx_sys_multiple_tablespace_format);
2846 ut_ad(dtuple_check_typed(entry));
2847 ut_ad(ut_is_2pow(zip_size));
2849 ut_a(!dict_index_is_clust(index));
2851 switch (UNIV_EXPECT(ibuf_use, IBUF_USE_INSERT)) {
2852 case IBUF_USE_NONE:
2853 return(FALSE);
2854 case IBUF_USE_INSERT:
2855 goto do_insert;
2856 case IBUF_USE_COUNT:
2857 break;
2860 ut_error; /* unknown value of ibuf_use */
2862 do_insert:
2863 entry_size = rec_get_converted_size(index, entry, 0);
2865 if (entry_size
2866 >= (page_get_free_space_of_empty(dict_table_is_comp(index->table))
2867 / 2)) {
2868 return(FALSE);
2871 err = ibuf_insert_low(BTR_MODIFY_PREV, entry, entry_size,
2872 index, space, zip_size, page_no, thr);
2873 if (err == DB_FAIL) {
2874 err = ibuf_insert_low(BTR_MODIFY_TREE, entry, entry_size,
2875 index, space, zip_size, page_no, thr);
2878 if (err == DB_SUCCESS) {
2879 #ifdef UNIV_IBUF_DEBUG
2880 /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
2881 page_no, index->name); */
2882 #endif
2883 return(TRUE);
2885 } else {
2886 ut_a(err == DB_STRONG_FAIL);
2888 return(FALSE);
2892 /********************************************************************//**
2893 During merge, inserts to an index page a secondary index entry extracted
2894 from the insert buffer.
2895 @return newly inserted record */
2896 static
2897 rec_t*
2898 ibuf_insert_to_index_page_low(
2899 /*==========================*/
2900 /* out: newly inserted record */
2901 const dtuple_t* entry, /*!< in: buffered entry to insert */
2902 buf_block_t* block, /*!< in/out: index page where the buffered
2903 entry should be placed */
2904 dict_index_t* index, /*!< in: record descriptor */
2905 mtr_t* mtr, /*!< in/out: mtr */
2906 page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
2907 after which to insert the buffered entry */
2909 const page_t* page;
2910 ulint space;
2911 ulint page_no;
2912 ulint zip_size;
2913 const page_t* bitmap_page;
2914 ulint old_bits;
2915 rec_t* rec;
2916 DBUG_ENTER("ibuf_insert_to_index_page_low");
2918 rec = page_cur_tuple_insert(page_cur, entry, index, 0, mtr);
2919 if (rec != NULL) {
2920 DBUG_RETURN(rec);
2923 /* If the record did not fit, reorganize */
2925 btr_page_reorganize(block, index, mtr);
2926 page_cur_search(block, index, entry, PAGE_CUR_LE, page_cur);
2928 /* This time the record must fit */
2930 rec = page_cur_tuple_insert(page_cur, entry, index, 0, mtr);
2931 if (rec != NULL) {
2932 DBUG_RETURN(rec);
2935 page = buf_block_get_frame(block);
2937 ut_print_timestamp(stderr);
2939 fprintf(stderr,
2940 " InnoDB: Error: Insert buffer insert fails;"
2941 " page free %lu, dtuple size %lu\n",
2942 (ulong) page_get_max_insert_size(page, 1),
2943 (ulong) rec_get_converted_size(index, entry, 0));
2944 fputs("InnoDB: Cannot insert index record ", stderr);
2945 dtuple_print(stderr, entry);
2946 fputs("\nInnoDB: The table where this index record belongs\n"
2947 "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
2948 "InnoDB: that table.\n", stderr);
2950 space = page_get_space_id(page);
2951 zip_size = buf_block_get_zip_size(block);
2952 page_no = page_get_page_no(page);
2954 bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
2955 old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
2956 IBUF_BITMAP_FREE, mtr);
2958 fprintf(stderr,
2959 "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
2960 (ulong) space, (ulong) page_no,
2961 (ulong) zip_size, (ulong) old_bits);
2963 fputs("InnoDB: Submit a detailed bug report"
2964 " to http://bugs.mysql.com\n", stderr);
2965 DBUG_RETURN(NULL);
2968 /************************************************************************
2969 During merge, inserts to an index page a secondary index entry extracted
2970 from the insert buffer. */
2971 static
2972 void
2973 ibuf_insert_to_index_page(
2974 /*======================*/
2975 const dtuple_t* entry, /*!< in: buffered entry to insert */
2976 buf_block_t* block, /*!< in/out: index page where the buffered entry
2977 should be placed */
2978 dict_index_t* index, /*!< in: record descriptor */
2979 mtr_t* mtr) /*!< in: mtr */
2981 page_cur_t page_cur;
2982 ulint low_match;
2983 page_t* page = buf_block_get_frame(block);
2984 rec_t* rec;
2985 DBUG_ENTER("ibuf_insert_to_index_page");
2987 ut_ad(ibuf_inside());
2988 ut_ad(dtuple_check_typed(entry));
2989 ut_ad(!buf_block_align(page)->index);
2991 if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
2992 != (ibool)!!page_is_comp(page))) {
2993 fputs("InnoDB: Trying to insert a record from"
2994 " the insert buffer to an index page\n"
2995 "InnoDB: but the 'compact' flag does not match!\n",
2996 stderr);
2997 goto dump;
3000 rec = page_rec_get_next(page_get_infimum_rec(page));
3002 if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
3003 != dtuple_get_n_fields(entry))) {
3004 fputs("InnoDB: Trying to insert a record from"
3005 " the insert buffer to an index page\n"
3006 "InnoDB: but the number of fields does not match!\n",
3007 stderr);
3008 dump:
3009 buf_page_print(page, 0);
3011 dtuple_print(stderr, entry);
3013 fputs("InnoDB: The table where where"
3014 " this index record belongs\n"
3015 "InnoDB: is now probably corrupt."
3016 " Please run CHECK TABLE on\n"
3017 "InnoDB: your tables.\n"
3018 "InnoDB: Submit a detailed bug report to"
3019 " http://bugs.mysql.com!\n", stderr);
3021 DBUG_VOID_RETURN;
3024 low_match = page_cur_search(block, index, entry,
3025 PAGE_CUR_LE, &page_cur);
3027 if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
3028 mem_heap_t* heap;
3029 upd_t* update;
3030 ulint* offsets;
3031 page_zip_des_t* page_zip;
3033 rec = page_cur_get_rec(&page_cur);
3035 /* This is based on
3036 row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
3037 ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
3039 heap = mem_heap_create(1024);
3041 offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
3042 &heap);
3043 update = row_upd_build_sec_rec_difference_binary(
3044 index, entry, rec, NULL, heap);
3046 page_zip = buf_block_get_page_zip(block);
3048 if (update->n_fields == 0) {
3049 /* The records only differ in the delete-mark.
3050 Clear the delete-mark, like we did before
3051 Bug #56680 was fixed. */
3052 btr_cur_set_deleted_flag_for_ibuf(
3053 rec, page_zip, FALSE, mtr);
3054 updated_in_place:
3055 mem_heap_free(heap);
3056 DBUG_VOID_RETURN;
3059 /* Copy the info bits. Clear the delete-mark. */
3060 update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
3061 update->info_bits &= ~REC_INFO_DELETED_FLAG;
3063 /* We cannot invoke btr_cur_optimistic_update() here,
3064 because we do not have a btr_cur_t or que_thr_t,
3065 as the insert buffer merge occurs at a very low level. */
3066 if (!row_upd_changes_field_size_or_external(index, offsets,
3067 update)
3068 && (!page_zip || btr_cur_update_alloc_zip(
3069 page_zip, block, index,
3070 rec_offs_size(offsets), FALSE, mtr))) {
3071 /* This is the easy case. Do something similar
3072 to btr_cur_update_in_place(). */
3073 row_upd_rec_in_place(rec, index, offsets,
3074 update, page_zip);
3076 /* Log the update in place operation. During recovery
3077 MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE
3078 expects trx_id, roll_ptr for secondary indexes. So we
3079 just write dummy trx_id(0), roll_ptr(0) */
3080 btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec,
3081 index, update,
3082 NULL,
3083 ut_dulint_zero, mtr);
3084 DBUG_EXECUTE_IF(
3085 "crash_after_log_ibuf_upd_inplace",
3086 log_buffer_flush_to_disk();
3087 fprintf(stderr,
3088 "InnoDB: Wrote log record for ibuf "
3089 "update in place operation\n");
3090 DBUG_SUICIDE();
3093 goto updated_in_place;
3096 /* A collation may identify values that differ in
3097 storage length.
3098 Some examples (1 or 2 bytes):
3099 utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
3100 utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
3101 utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
3103 latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
3105 Examples of a character (3-byte UTF-8 sequence)
3106 identified with 2 or 4 characters (1-byte UTF-8 sequences):
3108 utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
3109 utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
3112 /* Delete the different-length record, and insert the
3113 buffered one. */
3115 lock_rec_store_on_page_infimum(block, rec);
3116 page_cur_delete_rec(&page_cur, index, offsets, mtr);
3117 page_cur_move_to_prev(&page_cur);
3119 rec = ibuf_insert_to_index_page_low(entry, block, index, mtr,
3120 &page_cur);
3121 ut_ad(!cmp_dtuple_rec(entry, rec,
3122 rec_get_offsets(rec, index, NULL,
3123 ULINT_UNDEFINED,
3124 &heap)));
3125 mem_heap_free(heap);
3127 lock_rec_restore_from_page_infimum(block, rec, block);
3128 } else {
3129 ibuf_insert_to_index_page_low(entry, block, index, mtr,
3130 &page_cur);
3132 DBUG_VOID_RETURN;
3135 /*********************************************************************//**
3136 Deletes from ibuf the record on which pcur is positioned. If we have to
3137 resort to a pessimistic delete, this function commits mtr and closes
3138 the cursor.
3139 @return TRUE if mtr was committed and pcur closed in this operation */
3140 static
3141 ibool
3142 ibuf_delete_rec(
3143 /*============*/
3144 ulint space, /*!< in: space id */
3145 ulint page_no,/*!< in: index page number where the record
3146 should belong */
3147 btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
3148 delete, having latch mode BTR_MODIFY_LEAF */
3149 const dtuple_t* search_tuple,
3150 /*!< in: search tuple for entries of page_no */
3151 mtr_t* mtr) /*!< in: mtr */
3153 ibool success;
3154 page_t* root;
3155 ulint err;
3157 ut_ad(ibuf_inside());
3158 ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
3159 ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
3160 ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
3162 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
3163 if (ibuf_debug == 2) {
3164 /* Inject a fault (crash). We do this before trying
3165 optimistic delete, because a pessimistic delete in the
3166 change buffer would require a larger test case. */
3168 /* Flag the buffered record as processed, to avoid
3169 an assertion failure after crash recovery. */
3170 btr_cur_set_deleted_flag_for_ibuf(
3171 btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
3172 mtr_commit(mtr);
3173 log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
3174 DBUG_SUICIDE();
3176 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
3178 success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
3180 if (success) {
3181 #ifdef UNIV_IBUF_COUNT_DEBUG
3182 fprintf(stderr,
3183 "Decrementing ibuf count of space %lu page %lu\n"
3184 "from %lu by 1\n", space, page_no,
3185 ibuf_count_get(space, page_no));
3186 ibuf_count_set(space, page_no,
3187 ibuf_count_get(space, page_no) - 1);
3188 #endif
3189 return(FALSE);
3192 ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
3193 ut_ad(ibuf_rec_get_page_no(btr_pcur_get_rec(pcur)) == page_no);
3194 ut_ad(ibuf_rec_get_space(btr_pcur_get_rec(pcur)) == space);
3196 /* We have to resort to a pessimistic delete from ibuf.
3197 Delete-mark the record so that it will not be applied again,
3198 in case the server crashes before the pessimistic delete is
3199 made persistent. */
3200 btr_cur_set_deleted_flag_for_ibuf(
3201 btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
3203 btr_pcur_store_position(pcur, mtr);
3205 btr_pcur_commit_specify_mtr(pcur, mtr);
3207 mutex_enter(&ibuf_mutex);
3209 mtr_start(mtr);
3211 success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr);
3213 if (!success) {
3214 if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
3215 /* The tablespace has been dropped. It is possible
3216 that another thread has deleted the insert buffer
3217 entry. Do not complain. */
3218 goto commit_and_exit;
3221 fprintf(stderr,
3222 "InnoDB: ERROR: Submit the output to"
3223 " http://bugs.mysql.com\n"
3224 "InnoDB: ibuf cursor restoration fails!\n"
3225 "InnoDB: ibuf record inserted to page %lu\n",
3226 (ulong) page_no);
3227 fflush(stderr);
3229 rec_print_old(stderr, btr_pcur_get_rec(pcur));
3230 rec_print_old(stderr, pcur->old_rec);
3231 dtuple_print(stderr, search_tuple);
3233 rec_print_old(stderr,
3234 page_rec_get_next(btr_pcur_get_rec(pcur)));
3235 fflush(stderr);
3237 btr_pcur_commit_specify_mtr(pcur, mtr);
3239 fputs("InnoDB: Validating insert buffer tree:\n", stderr);
3240 if (!btr_validate_index(ibuf->index, NULL)) {
3241 ut_error;
3244 fprintf(stderr, "InnoDB: ibuf tree ok\n");
3245 fflush(stderr);
3247 goto func_exit;
3250 root = ibuf_tree_root_get(mtr);
3252 btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
3253 RB_NONE, mtr);
3254 ut_a(err == DB_SUCCESS);
3256 #ifdef UNIV_IBUF_COUNT_DEBUG
3257 ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
3258 #endif
3259 ibuf_size_update(root, mtr);
3261 commit_and_exit:
3262 btr_pcur_commit_specify_mtr(pcur, mtr);
3264 func_exit:
3265 btr_pcur_close(pcur);
3267 mutex_exit(&ibuf_mutex);
3269 return(TRUE);
3272 /*********************************************************************//**
3273 When an index page is read from a disk to the buffer pool, this function
3274 inserts to the page the possible index entries buffered in the insert buffer.
3275 The entries are deleted from the insert buffer. If the page is not read, but
3276 created in the buffer pool, this function deletes its buffered entries from
3277 the insert buffer; there can exist entries for such a page if the page
3278 belonged to an index which subsequently was dropped. */
3279 UNIV_INTERN
3280 void
3281 ibuf_merge_or_delete_for_page(
3282 /*==========================*/
3283 buf_block_t* block, /*!< in: if page has been read from
3284 disk, pointer to the page x-latched,
3285 else NULL */
3286 ulint space, /*!< in: space id of the index page */
3287 ulint page_no,/*!< in: page number of the index page */
3288 ulint zip_size,/*!< in: compressed page size in bytes,
3289 or 0 */
3290 ibool update_ibuf_bitmap)/*!< in: normally this is set
3291 to TRUE, but if we have deleted or are
3292 deleting the tablespace, then we
3293 naturally do not want to update a
3294 non-existent bitmap page */
3296 mem_heap_t* heap;
3297 btr_pcur_t pcur;
3298 dtuple_t* search_tuple;
3299 ulint n_inserts;
3300 #ifdef UNIV_IBUF_DEBUG
3301 ulint volume;
3302 #endif
3303 page_zip_des_t* page_zip = NULL;
3304 ibool tablespace_being_deleted = FALSE;
3305 ibool corruption_noticed = FALSE;
3306 mtr_t mtr;
3308 ut_ad(!block || buf_block_get_space(block) == space);
3309 ut_ad(!block || buf_block_get_page_no(block) == page_no);
3310 ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
3311 ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
3313 if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
3314 || trx_sys_hdr_page(space, page_no)) {
3315 return;
3318 /* We cannot refer to zip_size in the following, because
3319 zip_size is passed as ULINT_UNDEFINED (it is unknown) when
3320 buf_read_ibuf_merge_pages() is merging (discarding) changes
3321 for a dropped tablespace. When block != NULL or
3322 update_ibuf_bitmap is specified, the zip_size must be known.
3323 That is why we will repeat the check below, with zip_size in
3324 place of 0. Passing zip_size as 0 assumes that the
3325 uncompressed page size always is a power-of-2 multiple of the
3326 compressed page size. */
3328 if (ibuf_fixed_addr_page(space, 0, page_no)
3329 || fsp_descr_page(0, page_no)) {
3330 return;
3333 if (UNIV_LIKELY(update_ibuf_bitmap)) {
3334 ut_a(ut_is_2pow(zip_size));
3336 if (ibuf_fixed_addr_page(space, zip_size, page_no)
3337 || fsp_descr_page(zip_size, page_no)) {
3338 return;
3341 /* If the following returns FALSE, we get the counter
3342 incremented, and must decrement it when we leave this
3343 function. When the counter is > 0, that prevents tablespace
3344 from being dropped. */
3346 tablespace_being_deleted = fil_inc_pending_ops(space);
3348 if (UNIV_UNLIKELY(tablespace_being_deleted)) {
3349 /* Do not try to read the bitmap page from space;
3350 just delete the ibuf records for the page */
3352 block = NULL;
3353 update_ibuf_bitmap = FALSE;
3354 } else {
3355 page_t* bitmap_page;
3357 mtr_start(&mtr);
3359 bitmap_page = ibuf_bitmap_get_map_page(
3360 space, page_no, zip_size, &mtr);
3362 if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
3363 zip_size,
3364 IBUF_BITMAP_BUFFERED,
3365 &mtr)) {
3366 /* No inserts buffered for this page */
3367 mtr_commit(&mtr);
3369 if (!tablespace_being_deleted) {
3370 fil_decr_pending_ops(space);
3373 return;
3375 mtr_commit(&mtr);
3377 } else if (block
3378 && (ibuf_fixed_addr_page(space, zip_size, page_no)
3379 || fsp_descr_page(zip_size, page_no))) {
3381 return;
3384 ibuf_enter();
3386 heap = mem_heap_create(512);
3388 if (!trx_sys_multiple_tablespace_format) {
3389 ut_a(trx_doublewrite_must_reset_space_ids);
3390 search_tuple = ibuf_search_tuple_build(space, page_no, heap);
3391 } else {
3392 search_tuple = ibuf_new_search_tuple_build(space, page_no,
3393 heap);
3396 if (block) {
3397 /* Move the ownership of the x-latch on the page to this OS
3398 thread, so that we can acquire a second x-latch on it. This
3399 is needed for the insert operations to the index page to pass
3400 the debug checks. */
3402 rw_lock_x_lock_move_ownership(&(block->lock));
3403 page_zip = buf_block_get_page_zip(block);
3405 if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
3406 != FIL_PAGE_INDEX)
3407 || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
3409 page_t* bitmap_page;
3411 corruption_noticed = TRUE;
3413 ut_print_timestamp(stderr);
3415 mtr_start(&mtr);
3417 fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
3418 stderr);
3420 bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
3421 zip_size, &mtr);
3422 buf_page_print(bitmap_page, 0);
3424 mtr_commit(&mtr);
3426 fputs("\nInnoDB: Dump of the page:\n", stderr);
3428 buf_page_print(block->frame, 0);
3430 fprintf(stderr,
3431 "InnoDB: Error: corruption in the tablespace."
3432 " Bitmap shows insert\n"
3433 "InnoDB: buffer records to page n:o %lu"
3434 " though the page\n"
3435 "InnoDB: type is %lu, which is"
3436 " not an index leaf page!\n"
3437 "InnoDB: We try to resolve the problem"
3438 " by skipping the insert buffer\n"
3439 "InnoDB: merge for this page."
3440 " Please run CHECK TABLE on your tables\n"
3441 "InnoDB: to determine if they are corrupt"
3442 " after this.\n\n"
3443 "InnoDB: Please submit a detailed bug report"
3444 " to http://bugs.mysql.com\n\n",
3445 (ulong) page_no,
3446 (ulong)
3447 fil_page_get_type(block->frame));
3451 n_inserts = 0;
3452 #ifdef UNIV_IBUF_DEBUG
3453 volume = 0;
3454 #endif
3455 loop:
3456 mtr_start(&mtr);
3458 if (block) {
3459 ibool success;
3461 success = buf_page_get_known_nowait(
3462 RW_X_LATCH, block,
3463 BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
3465 ut_a(success);
3467 /* This is a user page (secondary index leaf page),
3468 but we pretend that it is a change buffer page in
3469 order to obey the latching order. This should be OK,
3470 because buffered changes are applied immediately while
3471 the block is io-fixed. Other threads must not try to
3472 latch an io-fixed block. */
3473 buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
3476 /* Position pcur in the insert buffer at the first entry for this
3477 index page */
3478 btr_pcur_open_on_user_rec(
3479 ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
3480 &pcur, &mtr);
3482 if (!btr_pcur_is_on_user_rec(&pcur)) {
3483 ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3485 goto reset_bit;
3488 for (;;) {
3489 rec_t* rec;
3491 ut_ad(btr_pcur_is_on_user_rec(&pcur));
3493 rec = btr_pcur_get_rec(&pcur);
3495 /* Check if the entry is for this index page */
3496 if (ibuf_rec_get_page_no(rec) != page_no
3497 || ibuf_rec_get_space(rec) != space) {
3499 if (block) {
3500 page_header_reset_last_insert(
3501 block->frame, page_zip, &mtr);
3504 goto reset_bit;
3507 if (UNIV_UNLIKELY(corruption_noticed)) {
3508 fputs("InnoDB: Discarding record\n ", stderr);
3509 rec_print_old(stderr, rec);
3510 fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
3511 } else if (block && !rec_get_deleted_flag(rec, 0)) {
3512 /* Now we have at pcur a record which should be
3513 inserted to the index page; NOTE that the call below
3514 copies pointers to fields in rec, and we must
3515 keep the latch to the rec page until the
3516 insertion is finished! */
3517 dtuple_t* entry;
3518 trx_id_t max_trx_id;
3519 dict_index_t* dummy_index;
3521 max_trx_id = page_get_max_trx_id(page_align(rec));
3522 page_update_max_trx_id(block, page_zip, max_trx_id,
3523 &mtr);
3525 entry = ibuf_build_entry_from_ibuf_rec(
3526 rec, heap, &dummy_index);
3527 #ifdef UNIV_IBUF_DEBUG
3528 volume += rec_get_converted_size(dummy_index, entry, 0)
3529 + page_dir_calc_reserved_space(1);
3530 ut_a(volume <= 4 * UNIV_PAGE_SIZE
3531 / IBUF_PAGE_SIZE_PER_FREE_SPACE);
3532 #endif
3533 ibuf_insert_to_index_page(entry, block,
3534 dummy_index, &mtr);
3535 ibuf_dummy_index_free(dummy_index);
3538 n_inserts++;
3540 /* Delete the record from ibuf */
3541 if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3542 &mtr)) {
3543 /* Deletion was pessimistic and mtr was committed:
3544 we start from the beginning again */
3546 goto loop;
3547 } else if (btr_pcur_is_after_last_on_page(&pcur)) {
3548 mtr_commit(&mtr);
3549 btr_pcur_close(&pcur);
3551 goto loop;
3555 reset_bit:
3556 #ifdef UNIV_IBUF_COUNT_DEBUG
3557 if (ibuf_count_get(space, page_no) > 0) {
3558 /* btr_print_tree(ibuf_data->index->tree, 100);
3559 ibuf_print(); */
3561 #endif
3562 if (UNIV_LIKELY(update_ibuf_bitmap)) {
3563 page_t* bitmap_page;
3565 bitmap_page = ibuf_bitmap_get_map_page(
3566 space, page_no, zip_size, &mtr);
3568 ibuf_bitmap_page_set_bits(
3569 bitmap_page, page_no, zip_size,
3570 IBUF_BITMAP_BUFFERED, FALSE, &mtr);
3572 if (block) {
3573 ulint old_bits = ibuf_bitmap_page_get_bits(
3574 bitmap_page, page_no, zip_size,
3575 IBUF_BITMAP_FREE, &mtr);
3577 ulint new_bits = ibuf_index_page_calc_free(
3578 zip_size, block);
3580 if (old_bits != new_bits) {
3581 ibuf_bitmap_page_set_bits(
3582 bitmap_page, page_no, zip_size,
3583 IBUF_BITMAP_FREE, new_bits, &mtr);
3588 mtr_commit(&mtr);
3589 btr_pcur_close(&pcur);
3590 mem_heap_free(heap);
3592 /* Protect our statistics keeping from race conditions */
3593 mutex_enter(&ibuf_mutex);
3595 ibuf->n_merges++;
3596 ibuf->n_merged_recs += n_inserts;
3598 mutex_exit(&ibuf_mutex);
3600 if (update_ibuf_bitmap && !tablespace_being_deleted) {
3602 fil_decr_pending_ops(space);
3605 ibuf_exit();
3607 #ifdef UNIV_IBUF_COUNT_DEBUG
3608 ut_a(ibuf_count_get(space, page_no) == 0);
3609 #endif
3612 /*********************************************************************//**
3613 Deletes all entries in the insert buffer for a given space id. This is used
3614 in DISCARD TABLESPACE and IMPORT TABLESPACE.
3615 NOTE: this does not update the page free bitmaps in the space. The space will
3616 become CORRUPT when you call this function! */
3617 UNIV_INTERN
3618 void
3619 ibuf_delete_for_discarded_space(
3620 /*============================*/
3621 ulint space) /*!< in: space id */
3623 mem_heap_t* heap;
3624 btr_pcur_t pcur;
3625 dtuple_t* search_tuple;
3626 rec_t* ibuf_rec;
3627 ulint page_no;
3628 ibool closed;
3629 ulint n_inserts;
3630 mtr_t mtr;
3632 heap = mem_heap_create(512);
3634 /* Use page number 0 to build the search tuple so that we get the
3635 cursor positioned at the first entry for this space id */
3637 search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
3639 n_inserts = 0;
3640 loop:
3641 ibuf_enter();
3643 mtr_start(&mtr);
3645 /* Position pcur in the insert buffer at the first entry for the
3646 space */
3647 btr_pcur_open_on_user_rec(
3648 ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
3649 &pcur, &mtr);
3651 if (!btr_pcur_is_on_user_rec(&pcur)) {
3652 ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
3654 goto leave_loop;
3657 for (;;) {
3658 ut_ad(btr_pcur_is_on_user_rec(&pcur));
3660 ibuf_rec = btr_pcur_get_rec(&pcur);
3662 /* Check if the entry is for this space */
3663 if (ibuf_rec_get_space(ibuf_rec) != space) {
3665 goto leave_loop;
3668 page_no = ibuf_rec_get_page_no(ibuf_rec);
3670 n_inserts++;
3672 /* Delete the record from ibuf */
3673 closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
3674 &mtr);
3675 if (closed) {
3676 /* Deletion was pessimistic and mtr was committed:
3677 we start from the beginning again */
3679 ibuf_exit();
3681 goto loop;
3684 if (btr_pcur_is_after_last_on_page(&pcur)) {
3685 mtr_commit(&mtr);
3686 btr_pcur_close(&pcur);
3688 ibuf_exit();
3690 goto loop;
3694 leave_loop:
3695 mtr_commit(&mtr);
3696 btr_pcur_close(&pcur);
3698 /* Protect our statistics keeping from race conditions */
3699 mutex_enter(&ibuf_mutex);
3701 ibuf->n_merges++;
3702 ibuf->n_merged_recs += n_inserts;
3704 mutex_exit(&ibuf_mutex);
3706 ibuf_exit();
3708 mem_heap_free(heap);
3711 /******************************************************************//**
3712 Looks if the insert buffer is empty.
3713 @return TRUE if empty */
3714 UNIV_INTERN
3715 ibool
3716 ibuf_is_empty(void)
3717 /*===============*/
3719 ibool is_empty;
3720 const page_t* root;
3721 mtr_t mtr;
3723 ibuf_enter();
3725 mutex_enter(&ibuf_mutex);
3727 mtr_start(&mtr);
3729 root = ibuf_tree_root_get(&mtr);
3731 if (page_get_n_recs(root) == 0) {
3733 is_empty = TRUE;
3735 if (ibuf->empty == FALSE) {
3736 fprintf(stderr,
3737 "InnoDB: Warning: insert buffer tree is empty"
3738 " but the data struct does not\n"
3739 "InnoDB: know it. This condition is legal"
3740 " if the master thread has not yet\n"
3741 "InnoDB: run to completion.\n");
3743 } else {
3744 ut_a(ibuf->empty == FALSE);
3746 is_empty = FALSE;
3749 mtr_commit(&mtr);
3751 mutex_exit(&ibuf_mutex);
3753 ibuf_exit();
3755 return(is_empty);
3758 /******************************************************************//**
3759 Prints info of ibuf. */
3760 UNIV_INTERN
3761 void
3762 ibuf_print(
3763 /*=======*/
3764 FILE* file) /*!< in: file where to print */
3766 #ifdef UNIV_IBUF_COUNT_DEBUG
3767 ulint i;
3768 ulint j;
3769 #endif
3771 mutex_enter(&ibuf_mutex);
3773 fprintf(file,
3774 "Ibuf: size %lu, free list len %lu, seg size %lu,\n"
3775 "%lu inserts, %lu merged recs, %lu merges\n",
3776 (ulong) ibuf->size,
3777 (ulong) ibuf->free_list_len,
3778 (ulong) ibuf->seg_size,
3779 (ulong) ibuf->n_inserts,
3780 (ulong) ibuf->n_merged_recs,
3781 (ulong) ibuf->n_merges);
3782 #ifdef UNIV_IBUF_COUNT_DEBUG
3783 for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
3784 for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
3785 ulint count = ibuf_count_get(i, j);
3787 if (count > 0) {
3788 fprintf(stderr,
3789 "Ibuf count for space/page %lu/%lu"
3790 " is %lu\n",
3791 (ulong) i, (ulong) j, (ulong) count);
3795 #endif /* UNIV_IBUF_COUNT_DEBUG */
3797 mutex_exit(&ibuf_mutex);
3799 #endif /* !UNIV_HOTBACKUP */