1 /* Innobase relational database engine; Copyright (C) 2001 Innobase Oy
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License 2
5 as published by the Free Software Foundation in June 1991.
7 This program is distributed in the hope that it will be useful,
8 but WITHOUT ANY WARRANTY; without even the implied warranty of
9 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 GNU General Public License for more details.
12 You should have received a copy of the GNU General Public License 2
13 along with this program (in file COPYING); if not, write to the Free
14 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
15 /******************************************************
16 The database buffer buf_pool
20 Created 11/5/1995 Heikki Tuuri
21 *******************************************************/
32 #include "lock0lock.h"
34 #include "ibuf0ibuf.h"
35 #include "dict0dict.h"
42 IMPLEMENTATION OF THE BUFFER POOL
43 =================================
45 Performance improvement:
46 ------------------------
47 Thread scheduling in NT may be so slow that the OS wait mechanism should
48 not be used even in waiting for disk reads to complete.
49 Rather, we should put waiting query threads to the queue of
50 waiting jobs, and let the OS thread do something useful while the i/o
51 is processed. In this way we could remove most OS thread switches in
52 an i/o-intensive benchmark like TPC-C.
54 A possibility is to put a user space thread library between the database
55 and NT. User space thread libraries might be very fast.
57 SQL Server 7.0 can be configured to use 'fibers' which are lightweight
58 threads in NT. These should be studied.
60 Buffer frames and blocks
61 ------------------------
62 Following the terminology of Gray and Reuter, we call the memory
63 blocks where file pages are loaded buffer frames. For each buffer
64 frame there is a control block, or shortly, a block, in the buffer
65 control array. The control info which does not need to be stored
66 in the file along with the file page, resides in the control block.
70 The buffer buf_pool contains a single mutex which protects all the
71 control data structures of the buf_pool. The content of a buffer frame is
72 protected by a separate read-write lock in its control block, though.
73 These locks can be locked and unlocked without owning the buf_pool mutex.
74 The OS events in the buf_pool struct can be waited for without owning the
77 The buf_pool mutex is a hot-spot in main memory, causing a lot of
78 memory bus traffic on multiprocessor systems when processors
79 alternately access the mutex. On our Pentium, the mutex is accessed
80 maybe every 10 microseconds. We gave up the solution to have mutexes
81 for each control block, for instance, because it seemed to be
84 A solution to reduce mutex contention of the buf_pool mutex is to
85 create a separate mutex for the page hash table. On Pentium,
86 accessing the hash table takes 2 microseconds, about half
87 of the total buf_pool mutex hold time.
92 The control block contains, for instance, the bufferfix count
93 which is incremented when a thread wants a file page to be fixed
94 in a buffer frame. The bufferfix operation does not lock the
95 contents of the frame, however. For this purpose, the control
96 block contains a read-write lock.
98 The buffer frames have to be aligned so that the start memory
99 address of a frame is divisible by the universal page size, which
102 We intend to make the buffer buf_pool size on-line reconfigurable,
103 that is, the buf_pool size can be changed without closing the database.
104 Then the database administarator may adjust it to be bigger
105 at night, for example. The control block array must
106 contain enough control blocks for the maximum buffer buf_pool size
107 which is used in the particular database.
108 If the buf_pool size is cut, we exploit the virtual memory mechanism of
109 the OS, and just refrain from using frames at high addresses. Then the OS
110 can swap them to disk.
112 The control blocks containing file pages are put to a hash table
113 according to the file address of the page.
114 We could speed up the access to an individual page by using
115 "pointer swizzling": we could replace the page references on
116 non-leaf index pages by direct pointers to the page, if it exists
117 in the buf_pool. We could make a separate hash table where we could
118 chain all the page references in non-leaf pages residing in the buf_pool,
119 using the page reference as the hash key,
120 and at the time of reading of a page update the pointers accordingly.
121 Drawbacks of this solution are added complexity and,
122 possibly, extra space required on non-leaf pages for memory pointers.
123 A simpler solution is just to speed up the hash table mechanism
124 in the database, using tables whose size is a power of 2.
129 There are several lists of control blocks. The free list contains
130 blocks which are currently not used.
132 The LRU-list contains all the blocks holding a file page
133 except those for which the bufferfix count is non-zero.
134 The pages are in the LRU list roughly in the order of the last
135 access to the page, so that the oldest pages are at the end of the
136 list. We also keep a pointer to near the end of the LRU list,
137 which we can use when we want to artificially age a page in the
138 buf_pool. This is used if we know that some page is not needed
139 again for some time: we insert the block right after the pointer,
140 causing it to be replaced sooner than would noramlly be the case.
141 Currently this aging mechanism is used for read-ahead mechanism
142 of pages, and it can also be used when there is a scan of a full
143 table which cannot fit in the memory. Putting the pages near the
144 of the LRU list, we make sure that most of the buf_pool stays in the
145 main memory, undisturbed.
147 The chain of modified blocks contains the blocks
148 holding file pages that have been modified in the memory
149 but not written to disk yet. The block with the oldest modification
150 which has not yet been written to disk is at the end of the chain.
155 First, a victim block for replacement has to be found in the
156 buf_pool. It is taken from the free list or searched for from the
157 end of the LRU-list. An exclusive lock is reserved for the frame,
158 the io_fix field is set in the block fixing the block in buf_pool,
159 and the io-operation for loading the page is queued. The io-handler thread
160 releases the X-lock on the frame and resets the io_fix field
161 when the io operation completes.
163 A thread may request the above operation using the function
164 buf_page_get(). It may then continue to request a lock on the frame.
165 The lock is granted when the io-handler releases the x-lock.
170 The read-ahead mechanism is intended to be intelligent and
171 isolated from the semantically higher levels of the database
172 index management. From the higher level we only need the
173 information if a file page has a natural successor or
174 predecessor page. On the leaf level of a B-tree index,
175 these are the next and previous pages in the natural
178 Let us first explain the read-ahead mechanism when the leafs
179 of a B-tree are scanned in an ascending or descending order.
180 When a read page is the first time referenced in the buf_pool,
181 the buffer manager checks if it is at the border of a so-called
182 linear read-ahead area. The tablespace is divided into these
183 areas of size 64 blocks, for example. So if the page is at the
184 border of such an area, the read-ahead mechanism checks if
185 all the other blocks in the area have been accessed in an
186 ascending or descending order. If this is the case, the system
187 looks at the natural successor or predecessor of the page,
188 checks if that is at the border of another area, and in this case
189 issues read-requests for all the pages in that area. Maybe
190 we could relax the condition that all the pages in the area
191 have to be accessed: if data is deleted from a table, there may
192 appear holes of unused pages in the area.
194 A different read-ahead mechanism is used when there appears
195 to be a random access pattern to a file.
196 If a new page is referenced in the buf_pool, and several pages
197 of its random access area (for instance, 32 consecutive pages
198 in a tablespace) have recently been referenced, we may predict
199 that the whole area may be needed in the near future, and issue
200 the read requests for the whole area.
205 By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
206 we mean the physical 16 kB memory area allocated from RAM for that block.
207 By a 'frame' we mean a 16 kB area in the virtual address space of the
208 process, in the frame_mem of buf_pool.
210 We can map pages to the frames of the buffer pool.
212 1) A buffer block allocated to use as a non-data page, e.g., to the lock
213 table, is always mapped to a frame.
214 2) A bufferfixed or io-fixed data page is always mapped to a frame.
215 3) When we need to map a block to frame, we look from the list
216 awe_LRU_free_mapped and try to unmap its last block, but note that
217 bufferfixed or io-fixed pages cannot be unmapped.
218 4) For every frame in the buffer pool there is always a block whose page is
219 mapped to it. When we create the buffer pool, we map the first elements
220 in the free list to the frames.
221 5) When we have AWE enabled, we disable adaptive hash indexes.
224 /* Value in microseconds */
225 static const int WAIT_FOR_READ
= 20000;
227 /* Number of attemtps made to read in a page in the buffer pool */
228 static const ulint BUF_PAGE_READ_MAX_RETRIES
= 100;
230 buf_pool_t
* buf_pool
= NULL
; /* The buffer buf_pool of the database */
233 ulint buf_dbg_counter
= 0; /* This is used to insert validation
234 operations in excution in the
236 ibool buf_debug_prints
= FALSE
; /* If this is set TRUE,
237 the program prints info whenever
238 read-ahead or flush occurs */
239 #endif /* UNIV_DEBUG */
240 /************************************************************************
241 Calculates a page checksum which is stored to the page when it is written
242 to a file. Note that we must be careful to calculate the same value on
243 32-bit and 64-bit architectures. */
246 buf_calc_page_new_checksum(
247 /*=======================*/
249 byte
* page
) /* in: buffer page */
253 /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
254 ..._ARCH_LOG_NO, are written outside the buffer pool to the first
255 pages of data files, we have to skip them in the page checksum
257 We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
258 checksum is stored, and also the last 8 bytes of page because
259 there we store the old formula checksum. */
261 checksum
= ut_fold_binary(page
+ FIL_PAGE_OFFSET
,
262 FIL_PAGE_FILE_FLUSH_LSN
- FIL_PAGE_OFFSET
)
263 + ut_fold_binary(page
+ FIL_PAGE_DATA
,
264 UNIV_PAGE_SIZE
- FIL_PAGE_DATA
265 - FIL_PAGE_END_LSN_OLD_CHKSUM
);
266 checksum
= checksum
& 0xFFFFFFFFUL
;
271 /************************************************************************
272 In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
273 looked at the first few bytes of the page. This calculates that old
275 NOTE: we must first store the new formula checksum to
276 FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
277 because this takes that field as an input! */
280 buf_calc_page_old_checksum(
281 /*=======================*/
283 byte
* page
) /* in: buffer page */
287 checksum
= ut_fold_binary(page
, FIL_PAGE_FILE_FLUSH_LSN
);
289 checksum
= checksum
& 0xFFFFFFFFUL
;
294 /************************************************************************
295 Checks if a page is corrupt. */
298 buf_page_is_corrupted(
299 /*==================*/
300 /* out: TRUE if corrupted */
301 byte
* read_buf
) /* in: a database page */
305 ulint checksum_field
;
306 ulint old_checksum_field
;
307 #ifndef UNIV_HOTBACKUP
310 if (mach_read_from_4(read_buf
+ FIL_PAGE_LSN
+ 4)
311 != mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
312 - FIL_PAGE_END_LSN_OLD_CHKSUM
+ 4)) {
314 /* Stored log sequence numbers at the start and the end
315 of page do not match */
320 #ifndef UNIV_HOTBACKUP
321 if (recv_lsn_checks_on
&& log_peek_lsn(¤t_lsn
)) {
322 if (ut_dulint_cmp(current_lsn
,
323 mach_read_from_8(read_buf
+ FIL_PAGE_LSN
))
325 ut_print_timestamp(stderr
);
328 " InnoDB: Error: page %lu log sequence number"
330 "InnoDB: is in the future! Current system "
331 "log sequence number %lu %lu.\n"
332 "InnoDB: Your database may be corrupt or "
333 "you may have copied the InnoDB\n"
334 "InnoDB: tablespace but not the InnoDB "
336 "InnoDB: http://dev.mysql.com/doc/refman/"
337 "5.1/en/forcing-innodb-recovery.html\n"
338 "InnoDB: for more information.\n",
339 (ulong
) mach_read_from_4(read_buf
341 (ulong
) ut_dulint_get_high
342 (mach_read_from_8(read_buf
+ FIL_PAGE_LSN
)),
343 (ulong
) ut_dulint_get_low
344 (mach_read_from_8(read_buf
+ FIL_PAGE_LSN
)),
345 (ulong
) ut_dulint_get_high(current_lsn
),
346 (ulong
) ut_dulint_get_low(current_lsn
));
351 /* If we use checksums validation, make additional check before
352 returning TRUE to ensure that the checksum is not equal to
353 BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
354 disabled. Otherwise, skip checksum calculation and return FALSE */
356 if (srv_use_checksums
) {
357 old_checksum
= buf_calc_page_old_checksum(read_buf
);
359 old_checksum_field
= mach_read_from_4(
360 read_buf
+ UNIV_PAGE_SIZE
361 - FIL_PAGE_END_LSN_OLD_CHKSUM
);
363 /* There are 2 valid formulas for old_checksum_field:
365 1. Very old versions of InnoDB only stored 8 byte lsn to the
366 start and the end of the page.
368 2. Newer InnoDB versions store the old formula checksum
371 if (old_checksum_field
!= mach_read_from_4(read_buf
373 && old_checksum_field
!= old_checksum
374 && old_checksum_field
!= BUF_NO_CHECKSUM_MAGIC
) {
379 checksum
= buf_calc_page_new_checksum(read_buf
);
380 checksum_field
= mach_read_from_4(read_buf
381 + FIL_PAGE_SPACE_OR_CHKSUM
);
383 /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
384 (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
386 if (checksum_field
!= 0 && checksum_field
!= checksum
387 && checksum_field
!= BUF_NO_CHECKSUM_MAGIC
) {
396 /************************************************************************
397 Prints a page to stderr. */
402 byte
* read_buf
) /* in: a database page */
408 ut_print_timestamp(stderr
);
409 fprintf(stderr
, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
410 (ulint
)UNIV_PAGE_SIZE
);
411 ut_print_buf(stderr
, read_buf
, UNIV_PAGE_SIZE
);
412 fputs("InnoDB: End of page dump\n", stderr
);
414 checksum
= srv_use_checksums
415 ? buf_calc_page_new_checksum(read_buf
) : BUF_NO_CHECKSUM_MAGIC
;
416 old_checksum
= srv_use_checksums
417 ? buf_calc_page_old_checksum(read_buf
) : BUF_NO_CHECKSUM_MAGIC
;
419 ut_print_timestamp(stderr
);
421 " InnoDB: Page checksum %lu, prior-to-4.0.14-form"
423 "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
424 " stored checksum %lu\n"
425 "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
427 "InnoDB: Page number (if stored to page already) %lu,\n"
428 "InnoDB: space id (if created with >= MySQL-4.1.1"
429 " and stored already) %lu\n",
430 (ulong
) checksum
, (ulong
) old_checksum
,
431 (ulong
) mach_read_from_4(read_buf
+ FIL_PAGE_SPACE_OR_CHKSUM
),
432 (ulong
) mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
433 - FIL_PAGE_END_LSN_OLD_CHKSUM
),
434 (ulong
) mach_read_from_4(read_buf
+ FIL_PAGE_LSN
),
435 (ulong
) mach_read_from_4(read_buf
+ FIL_PAGE_LSN
+ 4),
436 (ulong
) mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
437 - FIL_PAGE_END_LSN_OLD_CHKSUM
+ 4),
438 (ulong
) mach_read_from_4(read_buf
+ FIL_PAGE_OFFSET
),
439 (ulong
) mach_read_from_4(read_buf
440 + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
));
442 if (mach_read_from_2(read_buf
+ TRX_UNDO_PAGE_HDR
+ TRX_UNDO_PAGE_TYPE
)
443 == TRX_UNDO_INSERT
) {
445 "InnoDB: Page may be an insert undo log page\n");
446 } else if (mach_read_from_2(read_buf
+ TRX_UNDO_PAGE_HDR
447 + TRX_UNDO_PAGE_TYPE
)
448 == TRX_UNDO_UPDATE
) {
450 "InnoDB: Page may be an update undo log page\n");
453 switch (fil_page_get_type(read_buf
)) {
456 "InnoDB: Page may be an index page where"
457 " index id is %lu %lu\n",
458 (ulong
) ut_dulint_get_high
459 (btr_page_get_index_id(read_buf
)),
460 (ulong
) ut_dulint_get_low
461 (btr_page_get_index_id(read_buf
)));
463 /* If the code is in ibbackup, dict_sys may be uninitialized,
466 if (dict_sys
!= NULL
) {
468 index
= dict_index_find_on_id_low(
469 btr_page_get_index_id(read_buf
));
471 fputs("InnoDB: (", stderr
);
472 dict_index_name_print(stderr
, NULL
, index
);
473 fputs(")\n", stderr
);
478 fputs("InnoDB: Page may be an 'inode' page\n", stderr
);
480 case FIL_PAGE_IBUF_FREE_LIST
:
481 fputs("InnoDB: Page may be an insert buffer free list page\n",
484 case FIL_PAGE_TYPE_ALLOCATED
:
485 fputs("InnoDB: Page may be a freshly allocated page\n",
488 case FIL_PAGE_IBUF_BITMAP
:
489 fputs("InnoDB: Page may be an insert buffer bitmap page\n",
492 case FIL_PAGE_TYPE_SYS
:
493 fputs("InnoDB: Page may be a system page\n",
496 case FIL_PAGE_TYPE_TRX_SYS
:
497 fputs("InnoDB: Page may be a transaction system page\n",
500 case FIL_PAGE_TYPE_FSP_HDR
:
501 fputs("InnoDB: Page may be a file space header page\n",
504 case FIL_PAGE_TYPE_XDES
:
505 fputs("InnoDB: Page may be an extent descriptor page\n",
508 case FIL_PAGE_TYPE_BLOB
:
509 fputs("InnoDB: Page may be a BLOB page\n",
515 /************************************************************************
516 Initializes a buffer control block when the buf_pool is created. */
521 buf_block_t
* block
, /* in: pointer to control block */
522 byte
* frame
) /* in: pointer to buffer frame, or NULL if in
523 the case of AWE there is no frame */
527 block
->state
= BUF_BLOCK_NOT_USED
;
529 block
->frame
= frame
;
531 block
->awe_info
= NULL
;
533 block
->buf_fix_count
= 0;
536 block
->modify_clock
= ut_dulint_zero
;
538 block
->file_page_was_freed
= FALSE
;
540 block
->check_index_page_at_flush
= FALSE
;
543 block
->in_free_list
= FALSE
;
544 block
->in_LRU_list
= FALSE
;
546 block
->n_pointers
= 0;
548 mutex_create(&block
->mutex
, SYNC_BUF_BLOCK
);
550 rw_lock_create(&block
->lock
, SYNC_LEVEL_VARYING
);
551 ut_ad(rw_lock_validate(&(block
->lock
)));
553 #ifdef UNIV_SYNC_DEBUG
554 rw_lock_create(&block
->debug_latch
, SYNC_NO_ORDER_CHECK
);
555 #endif /* UNIV_SYNC_DEBUG */
558 /************************************************************************
559 Creates the buffer pool. */
564 /* out, own: buf_pool object, NULL if not
565 enough memory or error */
566 ulint max_size
, /* in: maximum size of the buf_pool in
568 ulint curr_size
, /* in: current size to use, must be <=
569 max_size, currently must be equal to
571 ulint n_frames
) /* in: number of frames; if AWE is used,
572 this is the size of the address space window
573 where physical memory pages are mapped; if
574 AWE is not used then this must be the same
581 ut_a(max_size
== curr_size
);
582 ut_a(srv_use_awe
|| n_frames
== max_size
);
584 if (n_frames
> curr_size
) {
586 "InnoDB: AWE: Error: you must specify in my.cnf"
587 " .._awe_mem_mb larger\n"
588 "InnoDB: than .._buffer_pool_size. Now the former"
590 "InnoDB: the latter %lu pages.\n",
591 (ulong
) curr_size
, (ulong
) n_frames
);
596 buf_pool
= mem_alloc(sizeof(buf_pool_t
));
598 /* 1. Initialize general fields
599 ---------------------------- */
600 mutex_create(&buf_pool
->mutex
, SYNC_BUF_POOL
);
602 mutex_enter(&(buf_pool
->mutex
));
605 /*----------------------------------------*/
606 /* Allocate the virtual address space window, i.e., the
607 buffer pool frames */
609 buf_pool
->frame_mem
= os_awe_allocate_virtual_mem_window(
610 UNIV_PAGE_SIZE
* (n_frames
+ 1));
612 /* Allocate the physical memory for AWE and the AWE info array
615 if ((curr_size
% ((1024 * 1024) / UNIV_PAGE_SIZE
)) != 0) {
618 "InnoDB: AWE: Error: physical memory must be"
619 " allocated in full megabytes.\n"
620 "InnoDB: Trying to allocate %lu"
621 " database pages.\n",
627 if (!os_awe_allocate_physical_mem(&(buf_pool
->awe_info
),
630 / UNIV_PAGE_SIZE
))) {
634 /*----------------------------------------*/
636 buf_pool
->frame_mem
= os_mem_alloc_large(
637 UNIV_PAGE_SIZE
* (n_frames
+ 1), FALSE
);
640 if (buf_pool
->frame_mem
== NULL
) {
645 buf_pool
->blocks
= ut_malloc(sizeof(buf_block_t
) * max_size
);
647 if (buf_pool
->blocks
== NULL
) {
652 buf_pool
->max_size
= max_size
;
653 buf_pool
->curr_size
= curr_size
;
655 buf_pool
->n_frames
= n_frames
;
657 /* Align pointer to the first frame */
659 frame
= ut_align(buf_pool
->frame_mem
, UNIV_PAGE_SIZE
);
661 buf_pool
->frame_zero
= frame
;
662 buf_pool
->high_end
= frame
+ UNIV_PAGE_SIZE
* n_frames
;
665 /*----------------------------------------*/
666 /* Map an initial part of the allocated physical memory to
669 os_awe_map_physical_mem_to_window(buf_pool
->frame_zero
,
672 / OS_AWE_X86_PAGE_SIZE
),
674 /*----------------------------------------*/
677 buf_pool
->blocks_of_frames
= ut_malloc(sizeof(void*) * n_frames
);
679 if (buf_pool
->blocks_of_frames
== NULL
) {
684 /* Init block structs and assign frames for them; in the case of
685 AWE there are less frames than blocks. Then we assign the frames
686 to the first blocks (we already mapped the memory above). We also
687 init the awe_info for every block. */
689 for (i
= 0; i
< max_size
; i
++) {
691 block
= buf_pool_get_nth_block(buf_pool
, i
);
694 frame
= buf_pool
->frame_zero
+ i
* UNIV_PAGE_SIZE
;
695 *(buf_pool
->blocks_of_frames
+ i
) = block
;
700 buf_block_init(block
, frame
);
703 /*----------------------------------------*/
704 block
->awe_info
= buf_pool
->awe_info
705 + i
* (UNIV_PAGE_SIZE
/ OS_AWE_X86_PAGE_SIZE
);
706 /*----------------------------------------*/
710 buf_pool
->page_hash
= hash_create(2 * max_size
);
712 buf_pool
->n_pend_reads
= 0;
714 buf_pool
->last_printout_time
= time(NULL
);
716 buf_pool
->n_pages_read
= 0;
717 buf_pool
->n_pages_written
= 0;
718 buf_pool
->n_pages_created
= 0;
719 buf_pool
->n_pages_awe_remapped
= 0;
721 buf_pool
->n_page_gets
= 0;
722 buf_pool
->n_page_gets_old
= 0;
723 buf_pool
->n_pages_read_old
= 0;
724 buf_pool
->n_pages_written_old
= 0;
725 buf_pool
->n_pages_created_old
= 0;
726 buf_pool
->n_pages_awe_remapped_old
= 0;
728 /* 2. Initialize flushing fields
729 ---------------------------- */
730 UT_LIST_INIT(buf_pool
->flush_list
);
732 for (i
= BUF_FLUSH_LRU
; i
<= BUF_FLUSH_LIST
; i
++) {
733 buf_pool
->n_flush
[i
] = 0;
734 buf_pool
->init_flush
[i
] = FALSE
;
735 buf_pool
->no_flush
[i
] = os_event_create(NULL
);
738 buf_pool
->LRU_flush_ended
= 0;
740 buf_pool
->ulint_clock
= 1;
741 buf_pool
->freed_page_clock
= 0;
743 /* 3. Initialize LRU fields
744 ---------------------------- */
745 UT_LIST_INIT(buf_pool
->LRU
);
747 buf_pool
->LRU_old
= NULL
;
749 UT_LIST_INIT(buf_pool
->awe_LRU_free_mapped
);
751 /* Add control blocks to the free list */
752 UT_LIST_INIT(buf_pool
->free
);
754 for (i
= 0; i
< curr_size
; i
++) {
756 block
= buf_pool_get_nth_block(buf_pool
, i
);
759 UNIV_MEM_INVALID(block
->frame
, UNIV_PAGE_SIZE
);
762 /* Add to the list of blocks mapped to
765 UT_LIST_ADD_LAST(awe_LRU_free_mapped
,
766 buf_pool
->awe_LRU_free_mapped
,
771 UT_LIST_ADD_LAST(free
, buf_pool
->free
, block
);
772 block
->in_free_list
= TRUE
;
775 mutex_exit(&(buf_pool
->mutex
));
777 if (srv_use_adaptive_hash_indexes
) {
778 btr_search_sys_create(curr_size
* UNIV_PAGE_SIZE
779 / sizeof(void*) / 64);
781 /* Create only a small dummy system */
782 btr_search_sys_create(1000);
788 /************************************************************************
789 Maps the page of block to a frame, if not mapped yet. Unmaps some page
790 from the end of the awe_LRU_free_mapped. */
793 buf_awe_map_page_to_frame(
794 /*======================*/
795 buf_block_t
* block
, /* in: block whose page should be
797 ibool add_to_mapped_list
) /* in: TRUE if we in the case
798 we need to map the page should also
800 awe_LRU_free_mapped list */
804 ut_ad(mutex_own(&(buf_pool
->mutex
)));
812 /* Scan awe_LRU_free_mapped from the end and try to find a block
813 which is not bufferfixed or io-fixed */
815 bck
= UT_LIST_GET_LAST(buf_pool
->awe_LRU_free_mapped
);
820 mutex_enter(&bck
->mutex
);
822 skip
= (bck
->state
== BUF_BLOCK_FILE_PAGE
823 && (bck
->buf_fix_count
!= 0 || bck
->io_fix
!= 0));
826 mutex_exit(&bck
->mutex
);
828 /* We have to skip this */
829 bck
= UT_LIST_GET_PREV(awe_LRU_free_mapped
, bck
);
831 /* We can map block to the frame of bck */
833 os_awe_map_physical_mem_to_window(
835 UNIV_PAGE_SIZE
/ OS_AWE_X86_PAGE_SIZE
,
838 block
->frame
= bck
->frame
;
840 *(buf_pool
->blocks_of_frames
841 + (((ulint
)(block
->frame
842 - buf_pool
->frame_zero
))
843 >> UNIV_PAGE_SIZE_SHIFT
))
847 UT_LIST_REMOVE(awe_LRU_free_mapped
,
848 buf_pool
->awe_LRU_free_mapped
,
851 if (add_to_mapped_list
) {
854 buf_pool
->awe_LRU_free_mapped
,
858 buf_pool
->n_pages_awe_remapped
++;
860 mutex_exit(&bck
->mutex
);
867 "InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
868 "InnoDB: awe_LRU_free_mapped list length %lu\n",
869 (ulong
) UT_LIST_GET_LEN(buf_pool
->awe_LRU_free_mapped
));
874 /************************************************************************
875 Allocates a buffer block. */
878 buf_block_alloc(void)
879 /*=================*/
880 /* out, own: the allocated block; also if AWE
881 is used it is guaranteed that the page is
886 block
= buf_LRU_get_free_block();
891 /************************************************************************
892 Moves to the block to the start of the LRU list if there is a danger
893 that the block would drift out of the buffer pool. */
896 buf_block_make_young(
897 /*=================*/
898 buf_block_t
* block
) /* in: block to make younger */
900 ut_ad(!mutex_own(&(buf_pool
->mutex
)));
902 /* Note that we read freed_page_clock's without holding any mutex:
903 this is allowed since the result is used only in heuristics */
905 if (buf_block_peek_if_too_old(block
)) {
907 mutex_enter(&buf_pool
->mutex
);
908 /* There has been freeing activity in the LRU list:
909 best to move to the head of the LRU list */
911 buf_LRU_make_block_young(block
);
912 mutex_exit(&buf_pool
->mutex
);
916 /************************************************************************
917 Moves a page to the start of the buffer pool LRU list. This high-level
918 function can be used to prevent an important page from from slipping out of
924 buf_frame_t
* frame
) /* in: buffer frame of a file page */
928 mutex_enter(&(buf_pool
->mutex
));
930 block
= buf_block_align(frame
);
932 ut_a(block
->state
== BUF_BLOCK_FILE_PAGE
);
934 buf_LRU_make_block_young(block
);
936 mutex_exit(&(buf_pool
->mutex
));
939 /************************************************************************
940 Frees a buffer block which does not contain a file page. */
945 buf_block_t
* block
) /* in, own: block to be freed */
947 mutex_enter(&(buf_pool
->mutex
));
949 mutex_enter(&block
->mutex
);
951 ut_a(block
->state
!= BUF_BLOCK_FILE_PAGE
);
953 buf_LRU_block_free_non_file_page(block
);
955 mutex_exit(&block
->mutex
);
957 mutex_exit(&(buf_pool
->mutex
));
960 /*************************************************************************
961 Allocates a buffer frame. */
964 buf_frame_alloc(void)
965 /*=================*/
966 /* out: buffer frame */
968 return(buf_block_alloc()->frame
);
971 /*************************************************************************
972 Frees a buffer frame which does not contain a file page. */
977 buf_frame_t
* frame
) /* in: buffer frame */
979 buf_block_free(buf_block_align(frame
));
982 /************************************************************************
983 Returns the buffer control block if the page can be found in the buffer
984 pool. NOTE that it is possible that the page is not yet read
985 from disk, though. This is a very low-level function: use with care! */
990 /* out: control block if found from page hash table,
991 otherwise NULL; NOTE that the page is not necessarily
992 yet read from disk! */
993 ulint space
, /* in: space id */
994 ulint offset
) /* in: page number */
998 mutex_enter_fast(&(buf_pool
->mutex
));
1000 block
= buf_page_hash_get(space
, offset
);
1002 mutex_exit(&(buf_pool
->mutex
));
1007 /************************************************************************
1008 Returns the current state of is_hashed of a page. FALSE if the page is
1009 not in the pool. NOTE that this operation does not fix the page in the
1010 pool if it is found there. */
1013 buf_page_peek_if_search_hashed(
1014 /*===========================*/
1015 /* out: TRUE if page hash index is built in search
1017 ulint space
, /* in: space id */
1018 ulint offset
) /* in: page number */
1023 mutex_enter_fast(&(buf_pool
->mutex
));
1025 block
= buf_page_hash_get(space
, offset
);
1030 is_hashed
= block
->is_hashed
;
1033 mutex_exit(&(buf_pool
->mutex
));
1038 /************************************************************************
1039 Returns TRUE if the page can be found in the buffer pool hash table. NOTE
1040 that it is possible that the page is not yet read from disk, though. */
1045 /* out: TRUE if found from page hash table,
1046 NOTE that the page is not necessarily yet read
1048 ulint space
, /* in: space id */
1049 ulint offset
) /* in: page number */
1051 if (buf_page_peek_block(space
, offset
)) {
1059 /************************************************************************
1060 Sets file_page_was_freed TRUE if the page is found in the buffer pool.
1061 This function should be called when we free a file page and want the
1062 debug version to check that it is not accessed any more unless
1066 buf_page_set_file_page_was_freed(
1067 /*=============================*/
1068 /* out: control block if found from page hash table,
1070 ulint space
, /* in: space id */
1071 ulint offset
) /* in: page number */
1075 mutex_enter_fast(&(buf_pool
->mutex
));
1077 block
= buf_page_hash_get(space
, offset
);
1080 block
->file_page_was_freed
= TRUE
;
1083 mutex_exit(&(buf_pool
->mutex
));
1088 /************************************************************************
1089 Sets file_page_was_freed FALSE if the page is found in the buffer pool.
1090 This function should be called when we free a file page and want the
1091 debug version to check that it is not accessed any more unless
1095 buf_page_reset_file_page_was_freed(
1096 /*===============================*/
1097 /* out: control block if found from page hash table,
1099 ulint space
, /* in: space id */
1100 ulint offset
) /* in: page number */
1104 mutex_enter_fast(&(buf_pool
->mutex
));
1106 block
= buf_page_hash_get(space
, offset
);
1109 block
->file_page_was_freed
= FALSE
;
1112 mutex_exit(&(buf_pool
->mutex
));
1117 /************************************************************************
1118 This is the general function used to get access to a database page. */
1123 /* out: pointer to the frame or NULL */
1124 ulint space
, /* in: space id */
1125 ulint offset
, /* in: page number */
1126 ulint rw_latch
,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
1127 buf_frame_t
* guess
, /* in: guessed frame or NULL */
1128 ulint mode
, /* in: BUF_GET, BUF_GET_IF_IN_POOL,
1129 BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
1130 const char* file
, /* in: file name */
1131 ulint line
, /* in: line where called */
1132 mtr_t
* mtr
) /* in: mini-transaction */
1142 ut_ad((rw_latch
== RW_S_LATCH
)
1143 || (rw_latch
== RW_X_LATCH
)
1144 || (rw_latch
== RW_NO_LATCH
));
1145 ut_ad((mode
!= BUF_GET_NO_LATCH
) || (rw_latch
== RW_NO_LATCH
));
1146 ut_ad((mode
== BUF_GET
) || (mode
== BUF_GET_IF_IN_POOL
)
1147 || (mode
== BUF_GET_NO_LATCH
) || (mode
== BUF_GET_NOWAIT
));
1148 #ifndef UNIV_LOG_DEBUG
1149 ut_ad(!ibuf_inside() || ibuf_page(space
, offset
));
1151 buf_pool
->n_page_gets
++;
1154 mutex_enter_fast(&(buf_pool
->mutex
));
1157 block
= buf_block_align(guess
);
1159 if ((offset
!= block
->offset
) || (space
!= block
->space
)
1160 || (block
->state
!= BUF_BLOCK_FILE_PAGE
)) {
1166 if (block
== NULL
) {
1167 block
= buf_page_hash_get(space
, offset
);
1170 if (block
== NULL
) {
1171 /* Page not in buf_pool: needs to be read from file */
1173 mutex_exit(&(buf_pool
->mutex
));
1175 if (mode
== BUF_GET_IF_IN_POOL
) {
1180 if (buf_read_page(space
, offset
)) {
1182 } else if (retries
< BUF_PAGE_READ_MAX_RETRIES
) {
1185 fprintf(stderr
, "InnoDB: Error: Unable"
1186 " to read tablespace %lu page no"
1187 " %lu into the buffer pool after"
1189 "InnoDB: The most probable cause"
1190 " of this error may be that the"
1191 " table has been corrupted.\n"
1192 "InnoDB: You can try to fix this"
1194 " innodb_force_recovery.\n"
1195 "InnoDB: Please see reference manual"
1196 " for more details.\n"
1197 "InnoDB: Aborting...\n",
1199 BUF_PAGE_READ_MAX_RETRIES
);
1207 if (buf_dbg_counter
% 37 == 0) {
1208 ut_ad(buf_validate());
1214 mutex_enter(&block
->mutex
);
1216 ut_a(block
->state
== BUF_BLOCK_FILE_PAGE
);
1220 if (block
->io_fix
== BUF_IO_READ
) {
1224 if (mode
== BUF_GET_IF_IN_POOL
) {
1225 /* The page is only being read to buffer */
1226 mutex_exit(&buf_pool
->mutex
);
1227 mutex_exit(&block
->mutex
);
1233 /* If AWE is enabled and the page is not mapped to a frame, then
1236 if (block
->frame
== NULL
) {
1239 /* We set second parameter TRUE because the block is in the
1240 LRU list and we must put it to awe_LRU_free_mapped list once
1241 mapped to a frame */
1243 buf_awe_map_page_to_frame(block
, TRUE
);
1246 #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
1247 if (mode
== BUF_GET_IF_IN_POOL
&& ibuf_debug
) {
1248 /* Try to evict the block from the buffer pool, to use the
1249 insert buffer as much as possible. */
1251 if (buf_LRU_free_block(block
)) {
1252 mutex_exit(&buf_pool
->mutex
);
1253 mutex_exit(&block
->mutex
);
1255 "innodb_change_buffering_debug evict %u %u\n",
1256 (unsigned) space
, (unsigned) offset
);
1258 } else if (buf_flush_page_try(block
)) {
1260 "innodb_change_buffering_debug flush %u %u\n",
1261 (unsigned) space
, (unsigned) offset
);
1262 guess
= block
->frame
;
1266 /* Failed to evict the page; change it directly */
1268 #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
1270 #ifdef UNIV_SYNC_DEBUG
1271 buf_block_buf_fix_inc_debug(block
, file
, line
);
1273 buf_block_buf_fix_inc(block
);
1275 mutex_exit(&buf_pool
->mutex
);
1277 /* Check if this is the first access to the page */
1279 accessed
= block
->accessed
;
1281 block
->accessed
= TRUE
;
1283 mutex_exit(&block
->mutex
);
1285 buf_block_make_young(block
);
1287 #ifdef UNIV_DEBUG_FILE_ACCESSES
1288 ut_a(block
->file_page_was_freed
== FALSE
);
1294 if (buf_dbg_counter
% 5771 == 0) {
1295 ut_ad(buf_validate());
1298 ut_ad(block
->buf_fix_count
> 0);
1299 ut_ad(block
->state
== BUF_BLOCK_FILE_PAGE
);
1301 if (mode
== BUF_GET_NOWAIT
) {
1302 if (rw_latch
== RW_S_LATCH
) {
1303 success
= rw_lock_s_lock_func_nowait(&(block
->lock
),
1305 fix_type
= MTR_MEMO_PAGE_S_FIX
;
1307 ut_ad(rw_latch
== RW_X_LATCH
);
1308 success
= rw_lock_x_lock_func_nowait(&(block
->lock
),
1310 fix_type
= MTR_MEMO_PAGE_X_FIX
;
1314 mutex_enter(&block
->mutex
);
1316 block
->buf_fix_count
--;
1318 mutex_exit(&block
->mutex
);
1319 #ifdef UNIV_SYNC_DEBUG
1320 rw_lock_s_unlock(&(block
->debug_latch
));
1325 } else if (rw_latch
== RW_NO_LATCH
) {
1328 /* Let us wait until the read operation
1332 mutex_enter(&block
->mutex
);
1334 if (block
->io_fix
== BUF_IO_READ
) {
1336 mutex_exit(&block
->mutex
);
1338 os_thread_sleep(WAIT_FOR_READ
);
1341 mutex_exit(&block
->mutex
);
1348 fix_type
= MTR_MEMO_BUF_FIX
;
1349 } else if (rw_latch
== RW_S_LATCH
) {
1351 rw_lock_s_lock_func(&(block
->lock
), 0, file
, line
);
1353 fix_type
= MTR_MEMO_PAGE_S_FIX
;
1355 rw_lock_x_lock_func(&(block
->lock
), 0, file
, line
);
1357 fix_type
= MTR_MEMO_PAGE_X_FIX
;
1360 mtr_memo_push(mtr
, block
, fix_type
);
1363 /* In the case of a first access, try to apply linear
1366 buf_read_ahead_linear(space
, offset
);
1369 #ifdef UNIV_IBUF_DEBUG
1370 ut_a(ibuf_count_get(block
->space
, block
->offset
) == 0);
1372 return(block
->frame
);
1375 /************************************************************************
1376 This is the general function used to get optimistic access to a database
1380 buf_page_optimistic_get_func(
1381 /*=========================*/
1382 /* out: TRUE if success */
1383 ulint rw_latch
,/* in: RW_S_LATCH, RW_X_LATCH */
1384 buf_block_t
* block
, /* in: guessed buffer block */
1385 buf_frame_t
* guess
, /* in: guessed frame; note that AWE may move
1387 dulint modify_clock
,/* in: modify clock value if mode is
1388 ..._GUESS_ON_CLOCK */
1389 const char* file
, /* in: file name */
1390 ulint line
, /* in: line where called */
1391 mtr_t
* mtr
) /* in: mini-transaction */
1397 ut_ad(mtr
&& block
);
1398 ut_ad((rw_latch
== RW_S_LATCH
) || (rw_latch
== RW_X_LATCH
));
1400 /* If AWE is used, block may have a different frame now, e.g., NULL */
1402 mutex_enter(&block
->mutex
);
1404 if (UNIV_UNLIKELY(block
->state
!= BUF_BLOCK_FILE_PAGE
)
1405 || UNIV_UNLIKELY(block
->frame
!= guess
)) {
1407 mutex_exit(&block
->mutex
);
1412 #ifdef UNIV_SYNC_DEBUG
1413 buf_block_buf_fix_inc_debug(block
, file
, line
);
1415 buf_block_buf_fix_inc(block
);
1417 accessed
= block
->accessed
;
1418 block
->accessed
= TRUE
;
1420 mutex_exit(&block
->mutex
);
1422 buf_block_make_young(block
);
1424 /* Check if this is the first access to the page */
1426 ut_ad(!ibuf_inside() || ibuf_page(block
->space
, block
->offset
));
1428 if (rw_latch
== RW_S_LATCH
) {
1429 success
= rw_lock_s_lock_func_nowait(&(block
->lock
),
1431 fix_type
= MTR_MEMO_PAGE_S_FIX
;
1433 success
= rw_lock_x_lock_func_nowait(&(block
->lock
),
1435 fix_type
= MTR_MEMO_PAGE_X_FIX
;
1438 if (UNIV_UNLIKELY(!success
)) {
1439 mutex_enter(&block
->mutex
);
1441 block
->buf_fix_count
--;
1443 mutex_exit(&block
->mutex
);
1445 #ifdef UNIV_SYNC_DEBUG
1446 rw_lock_s_unlock(&(block
->debug_latch
));
1451 if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock
, block
->modify_clock
))) {
1452 #ifdef UNIV_SYNC_DEBUG
1453 buf_page_dbg_add_level(block
->frame
, SYNC_NO_ORDER_CHECK
);
1454 #endif /* UNIV_SYNC_DEBUG */
1455 if (rw_latch
== RW_S_LATCH
) {
1456 rw_lock_s_unlock(&(block
->lock
));
1458 rw_lock_x_unlock(&(block
->lock
));
1461 mutex_enter(&block
->mutex
);
1463 block
->buf_fix_count
--;
1465 mutex_exit(&block
->mutex
);
1467 #ifdef UNIV_SYNC_DEBUG
1468 rw_lock_s_unlock(&(block
->debug_latch
));
1473 mtr_memo_push(mtr
, block
, fix_type
);
1478 if (buf_dbg_counter
% 5771 == 0) {
1479 ut_ad(buf_validate());
1482 ut_ad(block
->buf_fix_count
> 0);
1483 ut_ad(block
->state
== BUF_BLOCK_FILE_PAGE
);
1485 #ifdef UNIV_DEBUG_FILE_ACCESSES
1486 ut_a(block
->file_page_was_freed
== FALSE
);
1488 if (UNIV_UNLIKELY(!accessed
)) {
1489 /* In the case of a first access, try to apply linear
1492 buf_read_ahead_linear(buf_frame_get_space_id(guess
),
1493 buf_frame_get_page_no(guess
));
1496 #ifdef UNIV_IBUF_DEBUG
1497 ut_a(ibuf_count_get(block
->space
, block
->offset
) == 0);
1499 buf_pool
->n_page_gets
++;
1504 /************************************************************************
1505 This is used to get access to a known database page, when no waiting can be
1506 done. For example, if a search in an adaptive hash index leads us to this
1510 buf_page_get_known_nowait(
1511 /*======================*/
1512 /* out: TRUE if success */
1513 ulint rw_latch
,/* in: RW_S_LATCH, RW_X_LATCH */
1514 buf_frame_t
* guess
, /* in: the known page frame */
1515 ulint mode
, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
1516 const char* file
, /* in: file name */
1517 ulint line
, /* in: line where called */
1518 mtr_t
* mtr
) /* in: mini-transaction */
1525 ut_ad((rw_latch
== RW_S_LATCH
) || (rw_latch
== RW_X_LATCH
));
1527 block
= buf_block_align(guess
);
1529 mutex_enter(&block
->mutex
);
1531 if (block
->state
== BUF_BLOCK_REMOVE_HASH
) {
1532 /* Another thread is just freeing the block from the LRU list
1533 of the buffer pool: do not try to access this page; this
1534 attempt to access the page can only come through the hash
1535 index because when the buffer block state is ..._REMOVE_HASH,
1536 we have already removed it from the page address hash table
1537 of the buffer pool. */
1539 mutex_exit(&block
->mutex
);
1544 ut_a(block
->state
== BUF_BLOCK_FILE_PAGE
);
1546 #ifdef UNIV_SYNC_DEBUG
1547 buf_block_buf_fix_inc_debug(block
, file
, line
);
1549 buf_block_buf_fix_inc(block
);
1551 mutex_exit(&block
->mutex
);
1553 if (mode
== BUF_MAKE_YOUNG
) {
1554 buf_block_make_young(block
);
1557 ut_ad(!ibuf_inside() || (mode
== BUF_KEEP_OLD
));
1559 if (rw_latch
== RW_S_LATCH
) {
1560 success
= rw_lock_s_lock_func_nowait(&(block
->lock
),
1562 fix_type
= MTR_MEMO_PAGE_S_FIX
;
1564 success
= rw_lock_x_lock_func_nowait(&(block
->lock
),
1566 fix_type
= MTR_MEMO_PAGE_X_FIX
;
1570 mutex_enter(&block
->mutex
);
1572 block
->buf_fix_count
--;
1574 mutex_exit(&block
->mutex
);
1576 #ifdef UNIV_SYNC_DEBUG
1577 rw_lock_s_unlock(&(block
->debug_latch
));
1583 mtr_memo_push(mtr
, block
, fix_type
);
1588 if (buf_dbg_counter
% 5771 == 0) {
1589 ut_ad(buf_validate());
1592 ut_ad(block
->buf_fix_count
> 0);
1593 ut_ad(block
->state
== BUF_BLOCK_FILE_PAGE
);
1594 #ifdef UNIV_DEBUG_FILE_ACCESSES
1595 ut_a(block
->file_page_was_freed
== FALSE
);
1598 #ifdef UNIV_IBUF_DEBUG
1599 ut_a((mode
== BUF_KEEP_OLD
)
1600 || (ibuf_count_get(block
->space
, block
->offset
) == 0));
1602 buf_pool
->n_page_gets
++;
1607 /************************************************************************
1608 Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
1611 buf_page_init_for_backup_restore(
1612 /*=============================*/
1613 ulint space
, /* in: space id */
1614 ulint offset
, /* in: offset of the page within space
1615 in units of a page */
1616 buf_block_t
* block
) /* in: block to init */
1618 /* Set the state of the block */
1619 block
->magic_n
= BUF_BLOCK_MAGIC_N
;
1621 block
->state
= BUF_BLOCK_FILE_PAGE
;
1622 block
->space
= space
;
1623 block
->offset
= offset
;
1625 block
->lock_hash_val
= 0;
1627 block
->freed_page_clock
= 0;
1629 block
->newest_modification
= ut_dulint_zero
;
1630 block
->oldest_modification
= ut_dulint_zero
;
1632 block
->accessed
= FALSE
;
1633 block
->buf_fix_count
= 0;
1636 block
->n_hash_helps
= 0;
1637 block
->is_hashed
= FALSE
;
1638 block
->n_fields
= 1;
1640 block
->left_side
= TRUE
;
1642 block
->file_page_was_freed
= FALSE
;
1645 /************************************************************************
1646 Inits a page to the buffer buf_pool. */
1651 ulint space
, /* in: space id */
1652 ulint offset
, /* in: offset of the page within space
1653 in units of a page */
1654 buf_block_t
* block
) /* in: block to init */
1657 ut_ad(mutex_own(&(buf_pool
->mutex
)));
1658 ut_ad(mutex_own(&(block
->mutex
)));
1659 ut_a(block
->state
!= BUF_BLOCK_FILE_PAGE
);
1661 /* Set the state of the block */
1662 block
->magic_n
= BUF_BLOCK_MAGIC_N
;
1664 block
->state
= BUF_BLOCK_FILE_PAGE
;
1665 block
->space
= space
;
1666 block
->offset
= offset
;
1668 block
->check_index_page_at_flush
= FALSE
;
1669 block
->index
= NULL
;
1671 block
->lock_hash_val
= lock_rec_hash(space
, offset
);
1673 #ifdef UNIV_DEBUG_VALGRIND
1675 /* Silence valid Valgrind warnings about uninitialized
1676 data being written to data files. There are some unused
1677 bytes on some pages that InnoDB does not initialize. */
1678 UNIV_MEM_VALID(block
->frame
, UNIV_PAGE_SIZE
);
1680 #endif /* UNIV_DEBUG_VALGRIND */
1682 /* Insert into the hash table of file pages */
1684 if (buf_page_hash_get(space
, offset
)) {
1686 "InnoDB: Error: page %lu %lu already found"
1687 " in the hash table\n",
1695 #endif /* UNIV_DEBUG */
1699 HASH_INSERT(buf_block_t
, hash
, buf_pool
->page_hash
,
1700 buf_page_address_fold(space
, offset
), block
);
1702 block
->freed_page_clock
= 0;
1704 block
->newest_modification
= ut_dulint_zero
;
1705 block
->oldest_modification
= ut_dulint_zero
;
1707 block
->accessed
= FALSE
;
1708 block
->buf_fix_count
= 0;
1711 block
->n_hash_helps
= 0;
1712 block
->is_hashed
= FALSE
;
1713 block
->n_fields
= 1;
1715 block
->left_side
= TRUE
;
1717 block
->file_page_was_freed
= FALSE
;
1720 /************************************************************************
1721 Function which inits a page for read to the buffer buf_pool. If the page is
1722 (1) already in buf_pool, or
1723 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
1724 (3) if the space is deleted or being deleted,
1725 then this function does nothing.
1726 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
1727 on the buffer frame. The io-handler must take care that the flag is cleared
1728 and the lock released later. This is one of the functions which perform the
1729 state transition NOT_USED => FILE_PAGE to a block (the other is
1730 buf_page_create). */
1733 buf_page_init_for_read(
1734 /*===================*/
1735 /* out: pointer to the block or NULL */
1736 ulint
* err
, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
1737 ulint mode
, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */
1738 ulint space
, /* in: space id */
1739 ib_longlong tablespace_version
,/* in: prevents reading from a wrong
1740 version of the tablespace in case we have done
1742 ulint offset
) /* in: page number */
1751 if (mode
== BUF_READ_IBUF_PAGES_ONLY
) {
1752 /* It is a read-ahead within an ibuf routine */
1754 ut_ad(!ibuf_bitmap_page(offset
));
1755 ut_ad(ibuf_inside());
1759 if (!ibuf_page_low(space
, offset
, &mtr
)) {
1766 ut_ad(mode
== BUF_READ_ANY_PAGE
);
1769 block
= buf_block_alloc();
1773 mutex_enter(&(buf_pool
->mutex
));
1774 mutex_enter(&block
->mutex
);
1776 if (fil_tablespace_deleted_or_being_deleted_in_mem(
1777 space
, tablespace_version
)) {
1778 *err
= DB_TABLESPACE_DELETED
;
1781 if (*err
== DB_TABLESPACE_DELETED
1782 || NULL
!= buf_page_hash_get(space
, offset
)) {
1784 /* The page belongs to a space which has been
1785 deleted or is being deleted, or the page is
1786 already in buf_pool, return */
1788 mutex_exit(&block
->mutex
);
1789 mutex_exit(&(buf_pool
->mutex
));
1791 buf_block_free(block
);
1793 if (mode
== BUF_READ_IBUF_PAGES_ONLY
) {
1803 buf_page_init(space
, offset
, block
);
1805 /* The block must be put to the LRU list, to the old blocks */
1807 buf_LRU_add_block(block
, TRUE
); /* TRUE == to old blocks */
1809 block
->io_fix
= BUF_IO_READ
;
1811 buf_pool
->n_pend_reads
++;
1813 /* We set a pass-type x-lock on the frame because then the same
1814 thread which called for the read operation (and is running now at
1815 this point of code) can wait for the read to complete by waiting
1816 for the x-lock on the frame; if the x-lock were recursive, the
1817 same thread would illegally get the x-lock before the page read
1818 is completed. The x-lock is cleared by the io-handler thread. */
1820 rw_lock_x_lock_gen(&(block
->lock
), BUF_IO_READ
);
1822 mutex_exit(&block
->mutex
);
1823 mutex_exit(&(buf_pool
->mutex
));
1825 if (mode
== BUF_READ_IBUF_PAGES_ONLY
) {
1833 /************************************************************************
1834 Initializes a page to the buffer buf_pool. The page is usually not read
1835 from a file even if it cannot be found in the buffer buf_pool. This is one
1836 of the functions which perform to a block a state transition NOT_USED =>
1837 FILE_PAGE (the other is buf_page_init_for_read above). */
1842 /* out: pointer to the frame, page bufferfixed */
1843 ulint space
, /* in: space id */
1844 ulint offset
, /* in: offset of the page within space in units of
1846 mtr_t
* mtr
) /* in: mini-transaction handle */
1850 buf_block_t
* free_block
= NULL
;
1854 free_block
= buf_LRU_get_free_block();
1856 mutex_enter(&(buf_pool
->mutex
));
1858 block
= buf_page_hash_get(space
, offset
);
1860 if (block
!= NULL
) {
1861 #ifdef UNIV_IBUF_DEBUG
1862 ut_a(ibuf_count_get(block
->space
, block
->offset
) == 0);
1864 block
->file_page_was_freed
= FALSE
;
1866 /* Page can be found in buf_pool */
1867 mutex_exit(&(buf_pool
->mutex
));
1869 buf_block_free(free_block
);
1871 frame
= buf_page_get_with_no_latch(space
, offset
, mtr
);
1876 /* If we get here, the page was not in buf_pool: init it there */
1879 if (buf_debug_prints
) {
1880 fprintf(stderr
, "Creating space %lu page %lu to buffer\n",
1881 (ulong
) space
, (ulong
) offset
);
1883 #endif /* UNIV_DEBUG */
1887 mutex_enter(&block
->mutex
);
1889 buf_page_init(space
, offset
, block
);
1891 /* The block must be put to the LRU list */
1892 buf_LRU_add_block(block
, FALSE
);
1894 #ifdef UNIV_SYNC_DEBUG
1895 buf_block_buf_fix_inc_debug(block
, __FILE__
, __LINE__
);
1897 buf_block_buf_fix_inc(block
);
1899 buf_pool
->n_pages_created
++;
1901 mutex_exit(&(buf_pool
->mutex
));
1903 mtr_memo_push(mtr
, block
, MTR_MEMO_BUF_FIX
);
1905 block
->accessed
= TRUE
;
1907 mutex_exit(&block
->mutex
);
1909 /* Delete possible entries for the page from the insert buffer:
1910 such can exist if the page belonged to an index which was dropped */
1912 ibuf_merge_or_delete_for_page(NULL
, space
, offset
, TRUE
);
1914 /* Flush pages from the end of the LRU list if necessary */
1915 buf_flush_free_margin();
1917 frame
= block
->frame
;
1919 memset(frame
+ FIL_PAGE_PREV
, 0xff, 4);
1920 memset(frame
+ FIL_PAGE_NEXT
, 0xff, 4);
1921 mach_write_to_2(frame
+ FIL_PAGE_TYPE
, FIL_PAGE_TYPE_ALLOCATED
);
1923 /* Reset to zero the file flush lsn field in the page; if the first
1924 page of an ibdata file is 'created' in this function into the buffer
1925 pool then we lose the original contents of the file flush lsn stamp.
1926 Then InnoDB could in a crash recovery print a big, false, corruption
1927 warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
1929 memset(frame
+ FIL_PAGE_FILE_FLUSH_LSN
, 0, 8);
1934 if (buf_dbg_counter
% 357 == 0) {
1935 ut_ad(buf_validate());
1938 #ifdef UNIV_IBUF_DEBUG
1939 ut_a(ibuf_count_get(block
->space
, block
->offset
) == 0);
1944 /************************************************************************
1945 Completes an asynchronous read or write request of a file page to or from
1949 buf_page_io_complete(
1950 /*=================*/
1951 buf_block_t
* block
) /* in: pointer to the block in question */
1957 ut_a(block
->state
== BUF_BLOCK_FILE_PAGE
);
1959 /* We do not need protect block->io_fix here by block->mutex to read
1960 it because this is the only function where we can change the value
1961 from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
1962 ensures that this is the only thread that handles the i/o for this
1965 io_type
= block
->io_fix
;
1967 if (io_type
== BUF_IO_READ
) {
1968 /* If this page is not uninitialized and not in the
1969 doublewrite buffer, then the page number and space id
1970 should be the same as in block. */
1971 ulint read_page_no
= mach_read_from_4(
1972 block
->frame
+ FIL_PAGE_OFFSET
);
1973 ulint read_space_id
= mach_read_from_4(
1974 block
->frame
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
);
1977 && trx_doublewrite_page_inside(block
->offset
)) {
1979 ut_print_timestamp(stderr
);
1981 " InnoDB: Error: reading page %lu\n"
1982 "InnoDB: which is in the"
1983 " doublewrite buffer!\n",
1984 (ulong
) block
->offset
);
1985 } else if (!read_space_id
&& !read_page_no
) {
1986 /* This is likely an uninitialized page. */
1987 } else if ((block
->space
&& block
->space
!= read_space_id
)
1988 || block
->offset
!= read_page_no
) {
1989 /* We did not compare space_id to read_space_id
1990 if block->space == 0, because the field on the
1991 page may contain garbage in MySQL < 4.1.1,
1992 which only supported block->space == 0. */
1994 ut_print_timestamp(stderr
);
1996 " InnoDB: Error: space id and page n:o"
1997 " stored in the page\n"
1998 "InnoDB: read in are %lu:%lu,"
1999 " should be %lu:%lu!\n",
2000 (ulong
) read_space_id
, (ulong
) read_page_no
,
2001 (ulong
) block
->space
, (ulong
) block
->offset
);
2003 /* From version 3.23.38 up we store the page checksum
2004 to the 4 first bytes of the page end lsn field */
2006 if (buf_page_is_corrupted(block
->frame
)) {
2008 "InnoDB: Database page corruption on disk"
2010 "InnoDB: file read of page %lu.\n",
2011 (ulong
) block
->offset
);
2013 fputs("InnoDB: You may have to recover"
2014 " from a backup.\n", stderr
);
2016 buf_page_print(block
->frame
);
2019 "InnoDB: Database page corruption on disk"
2021 "InnoDB: file read of page %lu.\n",
2022 (ulong
) block
->offset
);
2023 fputs("InnoDB: You may have to recover"
2024 " from a backup.\n", stderr
);
2025 fputs("InnoDB: It is also possible that"
2027 "InnoDB: system has corrupted its"
2029 "InnoDB: and rebooting your computer"
2032 "InnoDB: If the corrupt page is an index page\n"
2033 "InnoDB: you can also try to"
2034 " fix the corruption\n"
2035 "InnoDB: by dumping, dropping,"
2036 " and reimporting\n"
2037 "InnoDB: the corrupt table."
2038 " You can use CHECK\n"
2039 "InnoDB: TABLE to scan your"
2040 " table for corruption.\n"
2042 " http://dev.mysql.com/doc/refman/5.1/en/"
2043 "forcing-innodb-recovery.html\n"
2044 "InnoDB: about forcing recovery.\n", stderr
);
2046 if (srv_force_recovery
< SRV_FORCE_IGNORE_CORRUPT
) {
2047 fputs("InnoDB: Ending processing because of"
2048 " a corrupt database page.\n",
2054 if (recv_recovery_is_on()) {
2055 recv_recover_page(FALSE
, TRUE
, block
->frame
,
2056 block
->space
, block
->offset
);
2059 if (!recv_no_ibuf_operations
) {
2060 ibuf_merge_or_delete_for_page(
2061 block
->frame
, block
->space
, block
->offset
,
2066 mutex_enter(&(buf_pool
->mutex
));
2067 mutex_enter(&block
->mutex
);
2069 #ifdef UNIV_IBUF_DEBUG
2070 ut_a(ibuf_count_get(block
->space
, block
->offset
) == 0);
2072 /* Because this thread which does the unlocking is not the same that
2073 did the locking, we use a pass value != 0 in unlock, which simply
2074 removes the newest lock debug record, without checking the thread
2079 if (io_type
== BUF_IO_READ
) {
2080 /* NOTE that the call to ibuf may have moved the ownership of
2081 the x-latch to this OS thread: do not let this confuse you in
2084 ut_ad(buf_pool
->n_pend_reads
> 0);
2085 buf_pool
->n_pend_reads
--;
2086 buf_pool
->n_pages_read
++;
2088 rw_lock_x_unlock_gen(&(block
->lock
), BUF_IO_READ
);
2091 if (buf_debug_prints
) {
2092 fputs("Has read ", stderr
);
2094 #endif /* UNIV_DEBUG */
2096 ut_ad(io_type
== BUF_IO_WRITE
);
2098 /* Write means a flush operation: call the completion
2099 routine in the flush system */
2101 buf_flush_write_complete(block
);
2103 rw_lock_s_unlock_gen(&(block
->lock
), BUF_IO_WRITE
);
2105 buf_pool
->n_pages_written
++;
2108 if (buf_debug_prints
) {
2109 fputs("Has written ", stderr
);
2111 #endif /* UNIV_DEBUG */
2114 mutex_exit(&block
->mutex
);
2115 mutex_exit(&(buf_pool
->mutex
));
2118 if (buf_debug_prints
) {
2119 fprintf(stderr
, "page space %lu page no %lu\n",
2120 (ulong
) block
->space
, (ulong
) block
->offset
);
2122 #endif /* UNIV_DEBUG */
2125 /*************************************************************************
2126 Invalidates the file pages in the buffer pool when an archive recovery is
2127 completed. All the file pages buffered must be in a replaceable state when
2128 this function is called: not latched and not modified. */
2131 buf_pool_invalidate(void)
2132 /*=====================*/
2136 ut_ad(buf_all_freed());
2141 freed
= buf_LRU_search_and_free_block(100);
2144 mutex_enter(&(buf_pool
->mutex
));
2146 ut_ad(UT_LIST_GET_LEN(buf_pool
->LRU
) == 0);
2148 mutex_exit(&(buf_pool
->mutex
));
2152 /*************************************************************************
2153 Validates the buffer buf_pool data structure. */
2161 ulint n_single_flush
= 0;
2162 ulint n_lru_flush
= 0;
2163 ulint n_list_flush
= 0;
2171 mutex_enter(&(buf_pool
->mutex
));
2173 for (i
= 0; i
< buf_pool
->curr_size
; i
++) {
2175 block
= buf_pool_get_nth_block(buf_pool
, i
);
2177 mutex_enter(&block
->mutex
);
2179 if (block
->state
== BUF_BLOCK_FILE_PAGE
) {
2181 ut_a(buf_page_hash_get(block
->space
,
2182 block
->offset
) == block
);
2185 #ifdef UNIV_IBUF_DEBUG
2186 ut_a((block
->io_fix
== BUF_IO_READ
)
2187 || ibuf_count_get(block
->space
, block
->offset
)
2190 if (block
->io_fix
== BUF_IO_WRITE
) {
2192 if (block
->flush_type
== BUF_FLUSH_LRU
) {
2194 ut_a(rw_lock_is_locked(
2197 } else if (block
->flush_type
2198 == BUF_FLUSH_LIST
) {
2200 } else if (block
->flush_type
2201 == BUF_FLUSH_SINGLE_PAGE
) {
2207 } else if (block
->io_fix
== BUF_IO_READ
) {
2209 ut_a(rw_lock_is_locked(&(block
->lock
),
2215 if (ut_dulint_cmp(block
->oldest_modification
,
2216 ut_dulint_zero
) > 0) {
2220 } else if (block
->state
== BUF_BLOCK_NOT_USED
) {
2224 mutex_exit(&block
->mutex
);
2227 if (n_lru
+ n_free
> buf_pool
->curr_size
) {
2228 fprintf(stderr
, "n LRU %lu, n free %lu\n",
2229 (ulong
) n_lru
, (ulong
) n_free
);
2233 ut_a(UT_LIST_GET_LEN(buf_pool
->LRU
) == n_lru
);
2234 if (UT_LIST_GET_LEN(buf_pool
->free
) != n_free
) {
2235 fprintf(stderr
, "Free list len %lu, free blocks %lu\n",
2236 (ulong
) UT_LIST_GET_LEN(buf_pool
->free
),
2240 ut_a(UT_LIST_GET_LEN(buf_pool
->flush_list
) == n_flush
);
2242 ut_a(buf_pool
->n_flush
[BUF_FLUSH_SINGLE_PAGE
] == n_single_flush
);
2243 ut_a(buf_pool
->n_flush
[BUF_FLUSH_LIST
] == n_list_flush
);
2244 ut_a(buf_pool
->n_flush
[BUF_FLUSH_LRU
] == n_lru_flush
);
2246 mutex_exit(&(buf_pool
->mutex
));
2248 ut_a(buf_LRU_validate());
2249 ut_a(buf_flush_validate());
2254 /*************************************************************************
2255 Prints info of the buffer buf_pool data structure. */
2269 dict_index_t
* index
;
2273 size
= buf_pool
->curr_size
;
2275 index_ids
= mem_alloc(sizeof(dulint
) * size
);
2276 counts
= mem_alloc(sizeof(ulint
) * size
);
2278 mutex_enter(&(buf_pool
->mutex
));
2281 "buf_pool size %lu\n"
2282 "database pages %lu\n"
2284 "modified database pages %lu\n"
2285 "n pending reads %lu\n"
2286 "n pending flush LRU %lu list %lu single page %lu\n"
2287 "pages read %lu, created %lu, written %lu\n",
2289 (ulong
) UT_LIST_GET_LEN(buf_pool
->LRU
),
2290 (ulong
) UT_LIST_GET_LEN(buf_pool
->free
),
2291 (ulong
) UT_LIST_GET_LEN(buf_pool
->flush_list
),
2292 (ulong
) buf_pool
->n_pend_reads
,
2293 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_LRU
],
2294 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_LIST
],
2295 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_SINGLE_PAGE
],
2296 (ulong
) buf_pool
->n_pages_read
, buf_pool
->n_pages_created
,
2297 (ulong
) buf_pool
->n_pages_written
);
2299 /* Count the number of blocks belonging to each index in the buffer */
2303 for (i
= 0; i
< size
; i
++) {
2304 frame
= buf_pool_get_nth_block(buf_pool
, i
)->frame
;
2306 if (fil_page_get_type(frame
) == FIL_PAGE_INDEX
) {
2308 id
= btr_page_get_index_id(frame
);
2310 /* Look for the id in the index_ids array */
2313 while (j
< n_found
) {
2315 if (ut_dulint_cmp(index_ids
[j
], id
) == 0) {
2331 mutex_exit(&(buf_pool
->mutex
));
2333 for (i
= 0; i
< n_found
; i
++) {
2334 index
= dict_index_get_if_in_cache(index_ids
[i
]);
2337 "Block count for index %lu in buffer is about %lu",
2338 (ulong
) ut_dulint_get_low(index_ids
[i
]),
2343 dict_index_name_print(stderr
, NULL
, index
);
2349 mem_free(index_ids
);
2352 ut_a(buf_validate());
2355 /*************************************************************************
2356 Returns the number of latched pages in the buffer pool. */
2359 buf_get_latched_pages_number(void)
2363 ulint fixed_pages_number
= 0;
2365 mutex_enter(&(buf_pool
->mutex
));
2367 for (i
= 0; i
< buf_pool
->curr_size
; i
++) {
2369 block
= buf_pool_get_nth_block(buf_pool
, i
);
2371 if (block
->magic_n
== BUF_BLOCK_MAGIC_N
) {
2372 mutex_enter(&block
->mutex
);
2374 if (block
->buf_fix_count
!= 0 || block
->io_fix
!= 0) {
2375 fixed_pages_number
++;
2378 mutex_exit(&block
->mutex
);
2382 mutex_exit(&(buf_pool
->mutex
));
2384 return(fixed_pages_number
);
2386 #endif /* UNIV_DEBUG */
2388 /*************************************************************************
2389 Returns the number of pending buf pool ios. */
2392 buf_get_n_pending_ios(void)
2393 /*=======================*/
2395 return(buf_pool
->n_pend_reads
2396 + buf_pool
->n_flush
[BUF_FLUSH_LRU
]
2397 + buf_pool
->n_flush
[BUF_FLUSH_LIST
]
2398 + buf_pool
->n_flush
[BUF_FLUSH_SINGLE_PAGE
]);
2401 /*************************************************************************
2402 Returns the ratio in percents of modified pages in the buffer pool /
2403 database pages in the buffer pool. */
2406 buf_get_modified_ratio_pct(void)
2407 /*============================*/
2411 mutex_enter(&(buf_pool
->mutex
));
2413 ratio
= (100 * UT_LIST_GET_LEN(buf_pool
->flush_list
))
2414 / (1 + UT_LIST_GET_LEN(buf_pool
->LRU
)
2415 + UT_LIST_GET_LEN(buf_pool
->free
));
2417 /* 1 + is there to avoid division by zero */
2419 mutex_exit(&(buf_pool
->mutex
));
2424 /*************************************************************************
2425 Prints info of the buffer i/o. */
2430 FILE* file
) /* in/out: buffer where to print */
2432 time_t current_time
;
2433 double time_elapsed
;
2437 size
= buf_pool
->curr_size
;
2439 mutex_enter(&(buf_pool
->mutex
));
2443 "AWE: Buffer pool memory frames %lu\n",
2444 (ulong
) buf_pool
->n_frames
);
2447 "AWE: Database pages and free buffers"
2448 " mapped in frames %lu\n",
2450 UT_LIST_GET_LEN(buf_pool
->awe_LRU_free_mapped
));
2453 "Buffer pool size %lu\n"
2454 "Free buffers %lu\n"
2455 "Database pages %lu\n"
2456 "Modified db pages %lu\n"
2457 "Pending reads %lu\n"
2458 "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
2460 (ulong
) UT_LIST_GET_LEN(buf_pool
->free
),
2461 (ulong
) UT_LIST_GET_LEN(buf_pool
->LRU
),
2462 (ulong
) UT_LIST_GET_LEN(buf_pool
->flush_list
),
2463 (ulong
) buf_pool
->n_pend_reads
,
2464 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_LRU
]
2465 + buf_pool
->init_flush
[BUF_FLUSH_LRU
],
2466 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_LIST
]
2467 + buf_pool
->init_flush
[BUF_FLUSH_LIST
],
2468 (ulong
) buf_pool
->n_flush
[BUF_FLUSH_SINGLE_PAGE
]);
2470 current_time
= time(NULL
);
2471 time_elapsed
= 0.001 + difftime(current_time
,
2472 buf_pool
->last_printout_time
);
2473 buf_pool
->last_printout_time
= current_time
;
2476 "Pages read %lu, created %lu, written %lu\n"
2477 "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
2478 (ulong
) buf_pool
->n_pages_read
,
2479 (ulong
) buf_pool
->n_pages_created
,
2480 (ulong
) buf_pool
->n_pages_written
,
2481 (buf_pool
->n_pages_read
- buf_pool
->n_pages_read_old
)
2483 (buf_pool
->n_pages_created
- buf_pool
->n_pages_created_old
)
2485 (buf_pool
->n_pages_written
- buf_pool
->n_pages_written_old
)
2489 fprintf(file
, "AWE: %.2f page remaps/s\n",
2490 (buf_pool
->n_pages_awe_remapped
2491 - buf_pool
->n_pages_awe_remapped_old
)
2495 if (buf_pool
->n_page_gets
> buf_pool
->n_page_gets_old
) {
2496 fprintf(file
, "Buffer pool hit rate %lu / 1000\n",
2498 (1000 - ((1000 * (buf_pool
->n_pages_read
2499 - buf_pool
->n_pages_read_old
))
2500 / (buf_pool
->n_page_gets
2501 - buf_pool
->n_page_gets_old
))));
2503 fputs("No buffer pool page gets since the last printout\n",
2507 buf_pool
->n_page_gets_old
= buf_pool
->n_page_gets
;
2508 buf_pool
->n_pages_read_old
= buf_pool
->n_pages_read
;
2509 buf_pool
->n_pages_created_old
= buf_pool
->n_pages_created
;
2510 buf_pool
->n_pages_written_old
= buf_pool
->n_pages_written
;
2511 buf_pool
->n_pages_awe_remapped_old
= buf_pool
->n_pages_awe_remapped
;
2513 mutex_exit(&(buf_pool
->mutex
));
2516 /**************************************************************************
2517 Refreshes the statistics used to print per-second averages. */
2520 buf_refresh_io_stats(void)
2521 /*======================*/
2523 buf_pool
->last_printout_time
= time(NULL
);
2524 buf_pool
->n_page_gets_old
= buf_pool
->n_page_gets
;
2525 buf_pool
->n_pages_read_old
= buf_pool
->n_pages_read
;
2526 buf_pool
->n_pages_created_old
= buf_pool
->n_pages_created
;
2527 buf_pool
->n_pages_written_old
= buf_pool
->n_pages_written
;
2528 buf_pool
->n_pages_awe_remapped_old
= buf_pool
->n_pages_awe_remapped
;
2531 /*************************************************************************
2532 Checks that all file pages in the buffer are in a replaceable state. */
2543 mutex_enter(&(buf_pool
->mutex
));
2545 for (i
= 0; i
< buf_pool
->curr_size
; i
++) {
2547 block
= buf_pool_get_nth_block(buf_pool
, i
);
2549 mutex_enter(&block
->mutex
);
2551 if (block
->state
== BUF_BLOCK_FILE_PAGE
) {
2553 if (!buf_flush_ready_for_replace(block
)) {
2556 "Page %lu %lu still fixed or dirty\n",
2557 (ulong
) block
->space
,
2558 (ulong
) block
->offset
);
2563 mutex_exit(&block
->mutex
);
2566 mutex_exit(&(buf_pool
->mutex
));
2571 /*************************************************************************
2572 Checks that there currently are no pending i/o-operations for the buffer
2576 buf_pool_check_no_pending_io(void)
2577 /*==============================*/
2578 /* out: TRUE if there is no pending i/o */
2582 mutex_enter(&(buf_pool
->mutex
));
2584 if (buf_pool
->n_pend_reads
+ buf_pool
->n_flush
[BUF_FLUSH_LRU
]
2585 + buf_pool
->n_flush
[BUF_FLUSH_LIST
]
2586 + buf_pool
->n_flush
[BUF_FLUSH_SINGLE_PAGE
]) {
2592 mutex_exit(&(buf_pool
->mutex
));
2597 /*************************************************************************
2598 Gets the current length of the free list of buffer blocks. */
2601 buf_get_free_list_len(void)
2602 /*=======================*/
2606 mutex_enter(&(buf_pool
->mutex
));
2608 len
= UT_LIST_GET_LEN(buf_pool
->free
);
2610 mutex_exit(&(buf_pool
->mutex
));