mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / srv / srv0srv.c
bloba54a8088a9b96c871bc746e0585f650ce4bc9eb8
1 /*****************************************************************************
3 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2008, 2009 Google Inc.
5 Copyright (c) 2009, Percona Inc.
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License as published by the Free Software
22 Foundation; version 2 of the License.
24 This program is distributed in the hope that it will be useful, but WITHOUT
25 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc.,
30 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
32 *****************************************************************************/
34 /**************************************************//**
35 @file srv/srv0srv.c
36 The database server main program
38 NOTE: SQL Server 7 uses something which the documentation
39 calls user mode scheduled threads (UMS threads). One such
40 thread is usually allocated per processor. Win32
41 documentation does not know any UMS threads, which suggests
42 that the concept is internal to SQL Server 7. It may mean that
43 SQL Server 7 does all the scheduling of threads itself, even
44 in i/o waits. We should maybe modify InnoDB to use the same
45 technique, because thread switches within NT may be too slow.
47 SQL Server 7 also mentions fibers, which are cooperatively
48 scheduled threads. They can boost performance by 5 %,
49 according to the Delaney and Soukup's book.
51 Windows 2000 will have something called thread pooling
52 (see msdn website), which we could possibly use.
54 Another possibility could be to use some very fast user space
55 thread library. This might confuse NT though.
57 Created 10/8/1995 Heikki Tuuri
58 *******************************************************/
60 /* Dummy comment */
61 #include "srv0srv.h"
63 #include "ut0mem.h"
64 #include "ut0ut.h"
65 #include "os0proc.h"
66 #include "mem0mem.h"
67 #include "mem0pool.h"
68 #include "sync0sync.h"
69 #include "thr0loc.h"
70 #include "que0que.h"
71 #include "srv0que.h"
72 #include "log0recv.h"
73 #include "pars0pars.h"
74 #include "usr0sess.h"
75 #include "lock0lock.h"
76 #include "trx0purge.h"
77 #include "ibuf0ibuf.h"
78 #include "buf0flu.h"
79 #include "buf0lru.h"
80 #include "btr0sea.h"
81 #include "dict0load.h"
82 #include "dict0boot.h"
83 #include "srv0start.h"
84 #include "row0mysql.h"
85 #include "ha_prototypes.h"
86 #include "trx0i_s.h"
87 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
88 #include "read0read.h"
90 #ifdef __WIN__
91 /* error LNK2001: unresolved external symbol _debug_sync_C_callback_ptr */
92 # define DEBUG_SYNC_C(dummy) ((void) 0)
93 #else
94 # include "m_string.h" /* for my_sys.h */
95 # include "my_sys.h" /* DEBUG_SYNC_C */
96 #endif
98 /* This is set to TRUE if the MySQL user has set it in MySQL; currently
99 affects only FOREIGN KEY definition parsing */
100 UNIV_INTERN ibool srv_lower_case_table_names = FALSE;
102 /* The following counter is incremented whenever there is some user activity
103 in the server */
104 UNIV_INTERN ulint srv_activity_count = 0;
106 /* The following is the maximum allowed duration of a lock wait. */
107 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
109 /* How much data manipulation language (DML) statements need to be delayed,
110 in microseconds, in order to reduce the lagging of the purge thread. */
111 UNIV_INTERN ulint srv_dml_needed_delay = 0;
113 UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
114 UNIV_INTERN ibool srv_monitor_active = FALSE;
115 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
117 UNIV_INTERN const char* srv_main_thread_op_info = "";
119 /** Prefix used by MySQL to indicate pre-5.1 table name encoding */
120 UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
122 /* Server parameters which are read from the initfile */
124 /* The following three are dir paths which are catenated before file
125 names, where the file name itself may also contain a path */
127 UNIV_INTERN char* srv_data_home = NULL;
128 #ifdef UNIV_LOG_ARCHIVE
129 UNIV_INTERN char* srv_arch_dir = NULL;
130 #endif /* UNIV_LOG_ARCHIVE */
132 /** store to its own file each table created by an user; data
133 dictionary tables are in the system tablespace 0 */
134 UNIV_INTERN my_bool srv_file_per_table;
135 /** The file format to use on new *.ibd files. */
136 UNIV_INTERN ulint srv_file_format = 0;
137 /** Whether to check file format during startup. A value of
138 DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
139 set it to the highest format we support. */
140 UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX;
142 #if DICT_TF_FORMAT_51
143 # error "DICT_TF_FORMAT_51 must be 0!"
144 #endif
145 /** Place locks to records only i.e. do not use next-key locking except
146 on duplicate key checking and foreign key checking */
147 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
149 UNIV_INTERN ulint srv_n_data_files = 0;
150 UNIV_INTERN char** srv_data_file_names = NULL;
151 /* size in database pages */
152 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
154 /* if TRUE, then we auto-extend the last data file */
155 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
156 /* if != 0, this tells the max size auto-extending may increase the
157 last data file size */
158 UNIV_INTERN ulint srv_last_file_size_max = 0;
159 /* If the last data file is auto-extended, we add this
160 many pages to it at a time */
161 UNIV_INTERN ulong srv_auto_extend_increment = 8;
162 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
164 /* If the following is TRUE we do not allow inserts etc. This protects
165 the user from forgetting the 'newraw' keyword to my.cnf */
167 UNIV_INTERN ibool srv_created_new_raw = FALSE;
169 UNIV_INTERN char** srv_log_group_home_dirs = NULL;
171 UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
172 UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
173 /* size in database pages */
174 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
175 /* size in database pages */
176 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
177 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
179 /* Try to flush dirty pages so as to avoid IO bursts at
180 the checkpoints. */
181 UNIV_INTERN char srv_adaptive_flushing = TRUE;
183 /** Maximum number of times allowed to conditionally acquire
184 mutex before switching to blocking wait on the mutex */
185 #define MAX_MUTEX_NOWAIT 20
187 /** Check whether the number of failed nonblocking mutex
188 acquisition attempts exceeds maximum allowed value. If so,
189 srv_printf_innodb_monitor() will request mutex acquisition
190 with mutex_enter(), which will wait until it gets the mutex. */
191 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
193 /** The sort order table of the MySQL latin1_swedish_ci character set
194 collation */
195 UNIV_INTERN const byte* srv_latin1_ordering;
197 /* use os/external memory allocator */
198 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
199 /* requested size in kilobytes */
200 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
201 /* previously requested size */
202 UNIV_INTERN ulint srv_buf_pool_old_size;
203 /* current size in kilobytes */
204 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
205 /* size in bytes */
206 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
207 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
209 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
210 instead. */
211 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
212 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
213 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
215 /* Switch to enable random read ahead. */
216 UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
217 /* User settable value of the number of pages that must be present
218 in the buffer cache and accessed sequentially for InnoDB to trigger a
219 readahead request. */
220 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
222 #ifdef UNIV_LOG_ARCHIVE
223 UNIV_INTERN ibool srv_log_archive_on = FALSE;
224 UNIV_INTERN ibool srv_archive_recovery = 0;
225 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
226 #endif /* UNIV_LOG_ARCHIVE */
228 /* This parameter is used to throttle the number of insert buffers that are
229 merged in a batch. By increasing this parameter on a faster disk you can
230 possibly reduce the number of I/O operations performed to complete the
231 merge operation. The value of this parameter is used as is by the
232 background loop when the system is idle (low load), on a busy system
233 the parameter is scaled down by a factor of 4, this is to avoid putting
234 a heavier load on the I/O sub system. */
236 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
238 UNIV_INTERN char* srv_file_flush_method_str = NULL;
239 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
240 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
242 UNIV_INTERN ulint srv_max_n_open_files = 300;
244 /* Number of IO operations per second the server can do */
245 UNIV_INTERN ulong srv_io_capacity = 200;
247 /* The InnoDB main thread tries to keep the ratio of modified pages
248 in the buffer pool to all database pages in the buffer pool smaller than
249 the following number. But it is not guaranteed that the value stays below
250 that during a time of heavy update/insert activity. */
252 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
254 /* variable counts amount of data read in total (in bytes) */
255 UNIV_INTERN ulint srv_data_read = 0;
257 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
258 NULL value when collecting statistics. By default, it is set to
259 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
260 UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
262 /* here we count the amount of data written in total (in bytes) */
263 UNIV_INTERN ulint srv_data_written = 0;
265 /* the number of the log write requests done */
266 UNIV_INTERN ulint srv_log_write_requests = 0;
268 /* the number of physical writes to the log performed */
269 UNIV_INTERN ulint srv_log_writes = 0;
271 /* amount of data written to the log files in bytes */
272 UNIV_INTERN ulint srv_os_log_written = 0;
274 /* amount of writes being done to the log files */
275 UNIV_INTERN ulint srv_os_log_pending_writes = 0;
277 /* we increase this counter, when there we don't have enough space in the
278 log buffer and have to flush it */
279 UNIV_INTERN ulint srv_log_waits = 0;
281 /* this variable counts the amount of times, when the doublewrite buffer
282 was flushed */
283 UNIV_INTERN ulint srv_dblwr_writes = 0;
285 /* here we store the number of pages that have been flushed to the
286 doublewrite buffer */
287 UNIV_INTERN ulint srv_dblwr_pages_written = 0;
289 /* in this variable we store the number of write requests issued */
290 UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
292 /* here we store the number of times when we had to wait for a free page
293 in the buffer pool. It happens when the buffer pool is full and we need
294 to make a flush, in order to be able to read or create a page. */
295 UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
297 /* variable to count the number of pages that were written from buffer
298 pool to the disk */
299 UNIV_INTERN ulint srv_buf_pool_flushed = 0;
301 /** Number of buffer pool reads that led to the
302 reading of a disk page */
303 UNIV_INTERN ulint srv_buf_pool_reads = 0;
305 /* structure to pass status variables to MySQL */
306 UNIV_INTERN export_struc export_vars;
308 /* If the following is != 0 we do not allow inserts etc. This protects
309 the user from forgetting the innodb_force_recovery keyword to my.cnf */
311 UNIV_INTERN ulint srv_force_recovery = 0;
312 /*-----------------------*/
313 /* We are prepared for a situation that we have this many threads waiting for
314 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
315 value. */
317 UNIV_INTERN ulint srv_max_n_threads = 0;
319 /* The following controls how many threads we let inside InnoDB concurrently:
320 threads waiting for locks are not counted into the number because otherwise
321 we could get a deadlock. MySQL creates a thread for each user session, and
322 semaphore contention and convoy problems can occur withput this restriction.
323 Value 10 should be good if there are less than 4 processors + 4 disks in the
324 computer. Bigger computers need bigger values. Value 0 will disable the
325 concurrency check. */
327 UNIV_INTERN ulong srv_thread_concurrency = 0;
329 /* this mutex protects srv_conc data structures */
330 UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
331 /* number of transactions that have declared_to_be_inside_innodb set.
332 It used to be a non-error for this value to drop below zero temporarily.
333 This is no longer true. We'll, however, keep the lint datatype to add
334 assertions to catch any corner cases that we may have missed. */
335 UNIV_INTERN lint srv_conc_n_threads = 0;
336 /* number of OS threads waiting in the FIFO for a permission to enter
337 InnoDB */
338 UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
340 typedef struct srv_conc_slot_struct srv_conc_slot_t;
341 struct srv_conc_slot_struct{
342 os_event_t event; /*!< event to wait */
343 ibool reserved; /*!< TRUE if slot
344 reserved */
345 ibool wait_ended; /*!< TRUE when another
346 thread has already set
347 the event and the
348 thread in this slot is
349 free to proceed; but
350 reserved may still be
351 TRUE at that point */
352 UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
355 /* queue of threads waiting to get in */
356 UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
357 /* array of wait slots */
358 UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
360 /* Number of times a thread is allowed to enter InnoDB within the same
361 SQL query after it has once got the ticket at srv_conc_enter_innodb */
362 #define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
363 #define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
364 /*-----------------------*/
365 /* If the following is set to 1 then we do not run purge and insert buffer
366 merge to completion before shutdown. If it is set to 2, do not even flush the
367 buffer pool to data files at the shutdown: we effectively 'crash'
368 InnoDB (but lose no committed transactions). */
369 UNIV_INTERN ulint srv_fast_shutdown = 0;
371 /* Generate a innodb_status.<pid> file */
372 UNIV_INTERN ibool srv_innodb_status = FALSE;
374 /* When estimating number of different key values in an index, sample
375 this many index pages */
376 UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
378 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
379 UNIV_INTERN ibool srv_use_checksums = TRUE;
381 UNIV_INTERN ibool srv_set_thread_priorities = TRUE;
382 UNIV_INTERN int srv_query_thread_priority = 0;
384 UNIV_INTERN ulong srv_replication_delay = 0;
386 /*-------------------------------------------*/
387 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
388 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
389 UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
390 UNIV_INTERN ulong srv_spin_wait_delay = 6;
391 UNIV_INTERN ibool srv_priority_boost = TRUE;
393 #ifdef UNIV_DEBUG
394 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
395 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
396 UNIV_INTERN ibool srv_print_buf_io = FALSE;
397 UNIV_INTERN ibool srv_print_log_io = FALSE;
398 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
399 #endif /* UNIV_DEBUG */
401 UNIV_INTERN ulint srv_n_rows_inserted = 0;
402 UNIV_INTERN ulint srv_n_rows_updated = 0;
403 UNIV_INTERN ulint srv_n_rows_deleted = 0;
404 UNIV_INTERN ulint srv_n_rows_read = 0;
406 static ulint srv_n_rows_inserted_old = 0;
407 static ulint srv_n_rows_updated_old = 0;
408 static ulint srv_n_rows_deleted_old = 0;
409 static ulint srv_n_rows_read_old = 0;
411 UNIV_INTERN ulint srv_n_lock_wait_count = 0;
412 UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
413 UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
414 UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
418 Set the following to 0 if you want InnoDB to write messages on
419 stderr on startup/shutdown
421 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
422 UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
423 UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
424 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
425 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
427 /* Array of English strings describing the current state of an
428 i/o handler thread */
430 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
431 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
433 UNIV_INTERN time_t srv_last_monitor_time;
435 UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
437 /* Mutex for locking srv_monitor_file */
438 UNIV_INTERN mutex_t srv_monitor_file_mutex;
439 /* Temporary file for innodb monitor output */
440 UNIV_INTERN FILE* srv_monitor_file;
441 /* Mutex for locking srv_dict_tmpfile.
442 This mutex has a very high rank; threads reserving it should not
443 be holding any InnoDB latches. */
444 UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
445 /* Temporary file for output from the data dictionary */
446 UNIV_INTERN FILE* srv_dict_tmpfile;
447 /* Mutex for locking srv_misc_tmpfile.
448 This mutex has a very low rank; threads reserving it should not
449 acquire any further latches or sleep before releasing this one. */
450 UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
451 /* Temporary file for miscellanous diagnostic output */
452 UNIV_INTERN FILE* srv_misc_tmpfile;
454 UNIV_INTERN ulint srv_main_thread_process_no = 0;
455 UNIV_INTERN ulint srv_main_thread_id = 0;
457 /* The following count work done by srv_master_thread. */
459 /* Iterations by the 'once per second' loop. */
460 static ulint srv_main_1_second_loops = 0;
461 /* Calls to sleep by the 'once per second' loop. */
462 static ulint srv_main_sleeps = 0;
463 /* Iterations by the 'once per 10 seconds' loop. */
464 static ulint srv_main_10_second_loops = 0;
465 /* Iterations of the loop bounded by the 'background_loop' label. */
466 static ulint srv_main_background_loops = 0;
467 /* Iterations of the loop bounded by the 'flush_loop' label. */
468 static ulint srv_main_flush_loops = 0;
469 /* Log writes involving flush. */
470 static ulint srv_log_writes_and_flush = 0;
472 /* This is only ever touched by the master thread. It records the
473 time when the last flush of log file has happened. The master
474 thread ensures that we flush the log files at least once per
475 second. */
476 static time_t srv_last_log_flush_time;
478 /* The master thread performs various tasks based on the current
479 state of IO activity and the level of IO utilization is past
480 intervals. Following macros define thresholds for these conditions. */
481 #define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
482 #define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
483 #define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
486 IMPLEMENTATION OF THE SERVER MAIN PROGRAM
487 =========================================
489 There is the following analogue between this database
490 server and an operating system kernel:
492 DB concept equivalent OS concept
493 ---------- ---------------------
494 transaction -- process;
496 query thread -- thread;
498 lock -- semaphore;
500 transaction set to
501 the rollback state -- kill signal delivered to a process;
503 kernel -- kernel;
505 query thread execution:
506 (a) without kernel mutex
507 reserved -- process executing in user mode;
508 (b) with kernel mutex reserved
509 -- process executing in kernel mode;
511 The server is controlled by a master thread which runs at
512 a priority higher than normal, that is, higher than user threads.
513 It sleeps most of the time, and wakes up, say, every 300 milliseconds,
514 to check whether there is anything happening in the server which
515 requires intervention of the master thread. Such situations may be,
516 for example, when flushing of dirty blocks is needed in the buffer
517 pool or old version of database rows have to be cleaned away.
519 The threads which we call user threads serve the queries of
520 the clients and input from the console of the server.
521 They run at normal priority. The server may have several
522 communications endpoints. A dedicated set of user threads waits
523 at each of these endpoints ready to receive a client request.
524 Each request is taken by a single user thread, which then starts
525 processing and, when the result is ready, sends it to the client
526 and returns to wait at the same endpoint the thread started from.
528 So, we do not have dedicated communication threads listening at
529 the endpoints and dealing the jobs to dedicated worker threads.
530 Our architecture saves one thread swithch per request, compared
531 to the solution with dedicated communication threads
532 which amounts to 15 microseconds on 100 MHz Pentium
533 running NT. If the client
534 is communicating over a network, this saving is negligible, but
535 if the client resides in the same machine, maybe in an SMP machine
536 on a different processor from the server thread, the saving
537 can be important as the threads can communicate over shared
538 memory with an overhead of a few microseconds.
540 We may later implement a dedicated communication thread solution
541 for those endpoints which communicate over a network.
543 Our solution with user threads has two problems: for each endpoint
544 there has to be a number of listening threads. If there are many
545 communication endpoints, it may be difficult to set the right number
546 of concurrent threads in the system, as many of the threads
547 may always be waiting at less busy endpoints. Another problem
548 is queuing of the messages, as the server internally does not
549 offer any queue for jobs.
551 Another group of user threads is intended for splitting the
552 queries and processing them in parallel. Let us call these
553 parallel communication threads. These threads are waiting for
554 parallelized tasks, suspended on event semaphores.
556 A single user thread waits for input from the console,
557 like a command to shut the database.
559 Utility threads are a different group of threads which takes
560 care of the buffer pool flushing and other, mainly background
561 operations, in the server.
562 Some of these utility threads always run at a lower than normal
563 priority, so that they are always in background. Some of them
564 may dynamically boost their priority by the pri_adjust function,
565 even to higher than normal priority, if their task becomes urgent.
566 The running of utilities is controlled by high- and low-water marks
567 of urgency. The urgency may be measured by the number of dirty blocks
568 in the buffer pool, in the case of the flush thread, for example.
569 When the high-water mark is exceeded, an utility starts running, until
570 the urgency drops under the low-water mark. Then the utility thread
571 suspend itself to wait for an event. The master thread is
572 responsible of signaling this event when the utility thread is
573 again needed.
575 For each individual type of utility, some threads always remain
576 at lower than normal priority. This is because pri_adjust is implemented
577 so that the threads at normal or higher priority control their
578 share of running time by calling sleep. Thus, if the load of the
579 system sudenly drops, these threads cannot necessarily utilize
580 the system fully. The background priority threads make up for this,
581 starting to run when the load drops.
583 When there is no activity in the system, also the master thread
584 suspends itself to wait for an event making
585 the server totally silent. The responsibility to signal this
586 event is on the user thread which again receives a message
587 from a client.
589 There is still one complication in our server design. If a
590 background utility thread obtains a resource (e.g., mutex) needed by a user
591 thread, and there is also some other user activity in the system,
592 the user thread may have to wait indefinitely long for the
593 resource, as the OS does not schedule a background thread if
594 there is some other runnable user thread. This problem is called
595 priority inversion in real-time programming.
597 One solution to the priority inversion problem would be to
598 keep record of which thread owns which resource and
599 in the above case boost the priority of the background thread
600 so that it will be scheduled and it can release the resource.
601 This solution is called priority inheritance in real-time programming.
602 A drawback of this solution is that the overhead of acquiring a mutex
603 increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
604 the thread has to call os_thread_get_curr_id.
605 This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
606 pair. Note that the thread
607 cannot store the information in the resource, say mutex, itself,
608 because competing threads could wipe out the information if it is
609 stored before acquiring the mutex, and if it stored afterwards,
610 the information is outdated for the time of one machine instruction,
611 at least. (To be precise, the information could be stored to
612 lock_word in mutex if the machine supports atomic swap.)
614 The above solution with priority inheritance may become actual in the
615 future, but at the moment we plan to implement a more coarse solution,
616 which could be called a global priority inheritance. If a thread
617 has to wait for a long time, say 300 milliseconds, for a resource,
618 we just guess that it may be waiting for a resource owned by a background
619 thread, and boost the priority of all runnable background threads
620 to the normal level. The background threads then themselves adjust
621 their fixed priority back to background after releasing all resources
622 they had (or, at some fixed points in their program code).
624 What is the performance of the global priority inheritance solution?
625 We may weigh the length of the wait time 300 milliseconds, during
626 which the system processes some other thread
627 to the cost of boosting the priority of each runnable background
628 thread, rescheduling it, and lowering the priority again.
629 On 100 MHz Pentium + NT this overhead may be of the order 100
630 microseconds per thread. So, if the number of runnable background
631 threads is not very big, say < 100, the cost is tolerable.
632 Utility threads probably will access resources used by
633 user threads not very often, so collisions of user threads
634 to preempted utility threads should not happen very often.
636 The thread table contains
637 information of the current status of each thread existing in the system,
638 and also the event semaphores used in suspending the master thread
639 and utility and parallel communication threads when they have nothing to do.
640 The thread table can be seen as an analogue to the process table
641 in a traditional Unix implementation.
643 The thread table is also used in the global priority inheritance
644 scheme. This brings in one additional complication: threads accessing
645 the thread table must have at least normal fixed priority,
646 because the priority inheritance solution does not work if a background
647 thread is preempted while possessing the mutex protecting the thread table.
648 So, if a thread accesses the thread table, its priority has to be
649 boosted at least to normal. This priority requirement can be seen similar to
650 the privileged mode used when processing the kernel calls in traditional
651 Unix.*/
653 /* Thread slot in the thread table */
654 struct srv_slot_struct{
655 os_thread_id_t id; /*!< thread id */
656 os_thread_t handle; /*!< thread handle */
657 unsigned type:3; /*!< thread type: user, utility etc. */
658 unsigned in_use:1; /*!< TRUE if this slot is in use */
659 unsigned suspended:1; /*!< TRUE if the thread is waiting
660 for the event of this slot */
661 ib_time_t suspend_time; /*!< time when the thread was
662 suspended */
663 os_event_t event; /*!< event used in suspending the
664 thread when it has nothing to do */
665 que_thr_t* thr; /*!< suspended query thread (only
666 used for MySQL threads) */
669 /* Table for MySQL threads where they will be suspended to wait for locks */
670 UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
672 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
674 UNIV_INTERN srv_sys_t* srv_sys = NULL;
676 /* padding to prevent other memory update hotspots from residing on
677 the same memory cache line */
678 UNIV_INTERN byte srv_pad1[64];
679 /* mutex protecting the server, trx structs, query threads, and lock table */
680 UNIV_INTERN mutex_t* kernel_mutex_temp;
681 /* padding to prevent other memory update hotspots from residing on
682 the same memory cache line */
683 UNIV_INTERN byte srv_pad2[64];
685 #if 0
686 /* The following three values measure the urgency of the jobs of
687 buffer, version, and insert threads. They may vary from 0 - 1000.
688 The server mutex protects all these variables. The low-water values
689 tell that the server can acquiesce the utility when the value
690 drops below this low-water mark. */
692 static ulint srv_meter[SRV_MASTER + 1];
693 static ulint srv_meter_low_water[SRV_MASTER + 1];
694 static ulint srv_meter_high_water[SRV_MASTER + 1];
695 static ulint srv_meter_high_water2[SRV_MASTER + 1];
696 static ulint srv_meter_foreground[SRV_MASTER + 1];
697 #endif
699 /* The following values give info about the activity going on in
700 the database. They are protected by the server mutex. The arrays
701 are indexed by the type of the thread. */
703 UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
704 UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
706 /***********************************************************************
707 Prints counters for work done by srv_master_thread. */
708 static
709 void
710 srv_print_master_thread_info(
711 /*=========================*/
712 FILE *file) /* in: output stream */
714 fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
715 "%lu 10_second, %lu background, %lu flush\n",
716 srv_main_1_second_loops, srv_main_sleeps,
717 srv_main_10_second_loops, srv_main_background_loops,
718 srv_main_flush_loops);
719 fprintf(file, "srv_master_thread log flush and writes: %lu\n",
720 srv_log_writes_and_flush);
723 /*********************************************************************//**
724 Sets the info describing an i/o thread current state. */
725 UNIV_INTERN
726 void
727 srv_set_io_thread_op_info(
728 /*======================*/
729 ulint i, /*!< in: the 'segment' of the i/o thread */
730 const char* str) /*!< in: constant char string describing the
731 state */
733 ut_a(i < SRV_MAX_N_IO_THREADS);
735 srv_io_thread_op_info[i] = str;
738 /*********************************************************************//**
739 Accessor function to get pointer to n'th slot in the server thread
740 table.
741 @return pointer to the slot */
742 static
743 srv_slot_t*
744 srv_table_get_nth_slot(
745 /*===================*/
746 ulint index) /*!< in: index of the slot */
748 ut_a(index < OS_THREAD_MAX_N);
750 return(srv_sys->threads + index);
753 /*********************************************************************//**
754 Gets the number of threads in the system.
755 @return sum of srv_n_threads[] */
756 UNIV_INTERN
757 ulint
758 srv_get_n_threads(void)
759 /*===================*/
761 ulint i;
762 ulint n_threads = 0;
764 mutex_enter(&kernel_mutex);
766 for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
768 n_threads += srv_n_threads[i];
771 mutex_exit(&kernel_mutex);
773 return(n_threads);
776 /*********************************************************************//**
777 Reserves a slot in the thread table for the current thread. Also creates the
778 thread local storage struct for the current thread. NOTE! The server mutex
779 has to be reserved by the caller!
780 @return reserved slot index */
781 static
782 ulint
783 srv_table_reserve_slot(
784 /*===================*/
785 enum srv_thread_type type) /*!< in: type of the thread */
787 srv_slot_t* slot;
788 ulint i;
790 ut_a(type > 0);
791 ut_a(type <= SRV_MASTER);
793 i = 0;
794 slot = srv_table_get_nth_slot(i);
796 while (slot->in_use) {
797 i++;
798 slot = srv_table_get_nth_slot(i);
801 ut_a(slot->in_use == FALSE);
803 slot->in_use = TRUE;
804 slot->suspended = FALSE;
805 slot->type = type;
806 slot->id = os_thread_get_curr_id();
807 slot->handle = os_thread_get_curr();
809 thr_local_create();
811 thr_local_set_slot_no(os_thread_get_curr_id(), i);
813 return(i);
816 /*********************************************************************//**
817 Suspends the calling thread to wait for the event in its thread slot.
818 NOTE! The server mutex has to be reserved by the caller!
819 @return event for the calling thread to wait */
820 static
821 os_event_t
822 srv_suspend_thread(void)
823 /*====================*/
825 srv_slot_t* slot;
826 os_event_t event;
827 ulint slot_no;
828 enum srv_thread_type type;
830 ut_ad(mutex_own(&kernel_mutex));
832 slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
834 if (srv_print_thread_releases) {
835 fprintf(stderr,
836 "Suspending thread %lu to slot %lu\n",
837 (ulong) os_thread_get_curr_id(), (ulong) slot_no);
840 slot = srv_table_get_nth_slot(slot_no);
842 type = slot->type;
844 ut_ad(type >= SRV_WORKER);
845 ut_ad(type <= SRV_MASTER);
847 event = slot->event;
849 slot->suspended = TRUE;
851 ut_ad(srv_n_threads_active[type] > 0);
853 srv_n_threads_active[type]--;
855 os_event_reset(event);
857 return(event);
860 /*********************************************************************//**
861 Releases threads of the type given from suspension in the thread table.
862 NOTE! The server mutex has to be reserved by the caller!
863 @return number of threads released: this may be less than n if not
864 enough threads were suspended at the moment */
865 UNIV_INTERN
866 ulint
867 srv_release_threads(
868 /*================*/
869 enum srv_thread_type type, /*!< in: thread type */
870 ulint n) /*!< in: number of threads to release */
872 srv_slot_t* slot;
873 ulint i;
874 ulint count = 0;
876 ut_ad(type >= SRV_WORKER);
877 ut_ad(type <= SRV_MASTER);
878 ut_ad(n > 0);
879 ut_ad(mutex_own(&kernel_mutex));
881 for (i = 0; i < OS_THREAD_MAX_N; i++) {
883 slot = srv_table_get_nth_slot(i);
885 if (slot->in_use && slot->type == type && slot->suspended) {
887 slot->suspended = FALSE;
889 srv_n_threads_active[type]++;
891 os_event_set(slot->event);
893 if (srv_print_thread_releases) {
894 fprintf(stderr,
895 "Releasing thread %lu type %lu"
896 " from slot %lu\n",
897 (ulong) slot->id, (ulong) type,
898 (ulong) i);
901 count++;
903 if (count == n) {
904 break;
909 return(count);
912 /*********************************************************************//**
913 Returns the calling thread type.
914 @return SRV_COM, ... */
915 UNIV_INTERN
916 enum srv_thread_type
917 srv_get_thread_type(void)
918 /*=====================*/
920 ulint slot_no;
921 srv_slot_t* slot;
922 enum srv_thread_type type;
924 mutex_enter(&kernel_mutex);
926 slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
928 slot = srv_table_get_nth_slot(slot_no);
930 type = slot->type;
932 ut_ad(type >= SRV_WORKER);
933 ut_ad(type <= SRV_MASTER);
935 mutex_exit(&kernel_mutex);
937 return(type);
940 /*********************************************************************//**
941 Initializes the server. */
942 UNIV_INTERN
943 void
944 srv_init(void)
945 /*==========*/
947 srv_conc_slot_t* conc_slot;
948 srv_slot_t* slot;
949 ulint i;
951 srv_sys = mem_alloc(sizeof(srv_sys_t));
953 kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
954 mutex_create(&kernel_mutex, SYNC_KERNEL);
956 mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
958 srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
960 for (i = 0; i < OS_THREAD_MAX_N; i++) {
961 slot = srv_table_get_nth_slot(i);
962 slot->in_use = FALSE;
963 slot->type=0; /* Avoid purify errors */
964 slot->event = os_event_create(NULL);
965 ut_a(slot->event);
968 srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
970 for (i = 0; i < OS_THREAD_MAX_N; i++) {
971 slot = srv_mysql_table + i;
972 slot->in_use = FALSE;
973 slot->type = 0;
974 slot->event = os_event_create(NULL);
975 ut_a(slot->event);
978 srv_lock_timeout_thread_event = os_event_create(NULL);
980 for (i = 0; i < SRV_MASTER + 1; i++) {
981 srv_n_threads_active[i] = 0;
982 srv_n_threads[i] = 0;
983 #if 0
984 srv_meter[i] = 30;
985 srv_meter_low_water[i] = 50;
986 srv_meter_high_water[i] = 100;
987 srv_meter_high_water2[i] = 200;
988 srv_meter_foreground[i] = 250;
989 #endif
992 UT_LIST_INIT(srv_sys->tasks);
994 /* Create dummy indexes for infimum and supremum records */
996 dict_ind_init();
998 /* Init the server concurrency restriction data structures */
1000 os_fast_mutex_init(&srv_conc_mutex);
1002 UT_LIST_INIT(srv_conc_queue);
1004 srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
1006 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1007 conc_slot = srv_conc_slots + i;
1008 conc_slot->reserved = FALSE;
1009 conc_slot->event = os_event_create(NULL);
1010 ut_a(conc_slot->event);
1013 /* Initialize some INFORMATION SCHEMA internal structures */
1014 trx_i_s_cache_init(trx_i_s_cache);
1017 /*********************************************************************//**
1018 Frees the data structures created in srv_init(). */
1019 UNIV_INTERN
1020 void
1021 srv_free(void)
1022 /*==========*/
1024 os_fast_mutex_free(&srv_conc_mutex);
1025 mem_free(srv_conc_slots);
1026 srv_conc_slots = NULL;
1028 mem_free(srv_sys->threads);
1029 mem_free(srv_sys);
1030 srv_sys = NULL;
1032 mem_free(kernel_mutex_temp);
1033 kernel_mutex_temp = NULL;
1034 mem_free(srv_mysql_table);
1035 srv_mysql_table = NULL;
1037 trx_i_s_cache_free(trx_i_s_cache);
1040 /*********************************************************************//**
1041 Initializes the synchronization primitives, memory system, and the thread
1042 local storage. */
1043 UNIV_INTERN
1044 void
1045 srv_general_init(void)
1046 /*==================*/
1048 ut_mem_init();
1049 /* Reset the system variables in the recovery module. */
1050 recv_sys_var_init();
1051 os_sync_init();
1052 sync_init();
1053 mem_init(srv_mem_pool_size);
1054 thr_local_init();
1057 /*======================= InnoDB Server FIFO queue =======================*/
1059 /* Maximum allowable purge history length. <=0 means 'infinite'. */
1060 UNIV_INTERN ulong srv_max_purge_lag = 0;
1062 /*********************************************************************//**
1063 Puts an OS thread to wait if there are too many concurrent threads
1064 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
1065 UNIV_INTERN
1066 void
1067 srv_conc_enter_innodb(
1068 /*==================*/
1069 trx_t* trx) /*!< in: transaction object associated with the
1070 thread */
1072 ibool has_slept = FALSE;
1073 srv_conc_slot_t* slot = NULL;
1074 ulint i;
1076 if (trx->mysql_thd != NULL
1077 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1079 UT_WAIT_FOR(srv_conc_n_threads
1080 < (lint)srv_thread_concurrency,
1081 srv_replication_delay * 1000);
1083 return;
1086 /* If trx has 'free tickets' to enter the engine left, then use one
1087 such ticket */
1089 if (trx->n_tickets_to_enter_innodb > 0) {
1090 trx->n_tickets_to_enter_innodb--;
1092 return;
1095 os_fast_mutex_lock(&srv_conc_mutex);
1096 retry:
1097 if (trx->declared_to_be_inside_innodb) {
1098 ut_print_timestamp(stderr);
1099 fputs(" InnoDB: Error: trying to declare trx"
1100 " to enter InnoDB, but\n"
1101 "InnoDB: it already is declared.\n", stderr);
1102 trx_print(stderr, trx, 0);
1103 putc('\n', stderr);
1104 os_fast_mutex_unlock(&srv_conc_mutex);
1106 return;
1109 ut_ad(srv_conc_n_threads >= 0);
1111 if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1113 srv_conc_n_threads++;
1114 trx->declared_to_be_inside_innodb = TRUE;
1115 trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1117 os_fast_mutex_unlock(&srv_conc_mutex);
1119 return;
1122 /* If the transaction is not holding resources, let it sleep
1123 for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1125 if (!has_slept && !trx->has_search_latch
1126 && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1128 has_slept = TRUE; /* We let it sleep only once to avoid
1129 starvation */
1131 srv_conc_n_waiting_threads++;
1133 os_fast_mutex_unlock(&srv_conc_mutex);
1135 trx->op_info = "sleeping before joining InnoDB queue";
1137 /* Peter Zaitsev suggested that we take the sleep away
1138 altogether. But the sleep may be good in pathological
1139 situations of lots of thread switches. Simply put some
1140 threads aside for a while to reduce the number of thread
1141 switches. */
1142 if (SRV_THREAD_SLEEP_DELAY > 0) {
1143 os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1146 trx->op_info = "";
1148 os_fast_mutex_lock(&srv_conc_mutex);
1150 srv_conc_n_waiting_threads--;
1152 goto retry;
1155 /* Too many threads inside: put the current thread to a queue */
1157 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1158 slot = srv_conc_slots + i;
1160 if (!slot->reserved) {
1162 break;
1166 if (i == OS_THREAD_MAX_N) {
1167 /* Could not find a free wait slot, we must let the
1168 thread enter */
1170 srv_conc_n_threads++;
1171 trx->declared_to_be_inside_innodb = TRUE;
1172 trx->n_tickets_to_enter_innodb = 0;
1174 os_fast_mutex_unlock(&srv_conc_mutex);
1176 return;
1179 /* Release possible search system latch this thread has */
1180 if (trx->has_search_latch) {
1181 trx_search_latch_release_if_reserved(trx);
1184 /* Add to the queue */
1185 slot->reserved = TRUE;
1186 slot->wait_ended = FALSE;
1188 UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1190 os_event_reset(slot->event);
1192 srv_conc_n_waiting_threads++;
1194 os_fast_mutex_unlock(&srv_conc_mutex);
1196 /* Go to wait for the event; when a thread leaves InnoDB it will
1197 release this thread */
1199 trx->op_info = "waiting in InnoDB queue";
1201 os_event_wait(slot->event);
1203 trx->op_info = "";
1205 os_fast_mutex_lock(&srv_conc_mutex);
1207 srv_conc_n_waiting_threads--;
1209 /* NOTE that the thread which released this thread already
1210 incremented the thread counter on behalf of this thread */
1212 slot->reserved = FALSE;
1214 UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1216 trx->declared_to_be_inside_innodb = TRUE;
1217 trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1219 os_fast_mutex_unlock(&srv_conc_mutex);
1222 /*********************************************************************//**
1223 This lets a thread enter InnoDB regardless of the number of threads inside
1224 InnoDB. This must be called when a thread ends a lock wait. */
1225 UNIV_INTERN
1226 void
1227 srv_conc_force_enter_innodb(
1228 /*========================*/
1229 trx_t* trx) /*!< in: transaction object associated with the
1230 thread */
1232 if (UNIV_LIKELY(!srv_thread_concurrency)) {
1234 return;
1237 ut_ad(srv_conc_n_threads >= 0);
1239 os_fast_mutex_lock(&srv_conc_mutex);
1241 srv_conc_n_threads++;
1242 trx->declared_to_be_inside_innodb = TRUE;
1243 trx->n_tickets_to_enter_innodb = 1;
1245 os_fast_mutex_unlock(&srv_conc_mutex);
1248 /*********************************************************************//**
1249 This must be called when a thread exits InnoDB in a lock wait or at the
1250 end of an SQL statement. */
1251 UNIV_INTERN
1252 void
1253 srv_conc_force_exit_innodb(
1254 /*=======================*/
1255 trx_t* trx) /*!< in: transaction object associated with the
1256 thread */
1258 srv_conc_slot_t* slot = NULL;
1260 if (trx->mysql_thd != NULL
1261 && thd_is_replication_slave_thread(trx->mysql_thd)) {
1263 return;
1266 if (trx->declared_to_be_inside_innodb == FALSE) {
1268 return;
1271 os_fast_mutex_lock(&srv_conc_mutex);
1273 ut_ad(srv_conc_n_threads > 0);
1274 srv_conc_n_threads--;
1275 trx->declared_to_be_inside_innodb = FALSE;
1276 trx->n_tickets_to_enter_innodb = 0;
1278 if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1279 /* Look for a slot where a thread is waiting and no other
1280 thread has yet released the thread */
1282 slot = UT_LIST_GET_FIRST(srv_conc_queue);
1284 while (slot && slot->wait_ended == TRUE) {
1285 slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1288 if (slot != NULL) {
1289 slot->wait_ended = TRUE;
1291 /* We increment the count on behalf of the released
1292 thread */
1294 srv_conc_n_threads++;
1298 os_fast_mutex_unlock(&srv_conc_mutex);
1300 if (slot != NULL) {
1301 os_event_set(slot->event);
1305 /*********************************************************************//**
1306 This must be called when a thread exits InnoDB. */
1307 UNIV_INTERN
1308 void
1309 srv_conc_exit_innodb(
1310 /*=================*/
1311 trx_t* trx) /*!< in: transaction object associated with the
1312 thread */
1314 if (trx->n_tickets_to_enter_innodb > 0) {
1315 /* We will pretend the thread is still inside InnoDB though it
1316 now leaves the InnoDB engine. In this way we save
1317 a lot of semaphore operations. srv_conc_force_exit_innodb is
1318 used to declare the thread definitely outside InnoDB. It
1319 should be called when there is a lock wait or an SQL statement
1320 ends. */
1322 return;
1325 srv_conc_force_exit_innodb(trx);
1328 /*========================================================================*/
1330 /*********************************************************************//**
1331 Normalizes init parameter values to use units we use inside InnoDB.
1332 @return DB_SUCCESS or error code */
1333 static
1334 ulint
1335 srv_normalize_init_values(void)
1336 /*===========================*/
1338 ulint n;
1339 ulint i;
1341 n = srv_n_data_files;
1343 for (i = 0; i < n; i++) {
1344 srv_data_file_sizes[i] = srv_data_file_sizes[i]
1345 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1348 srv_last_file_size_max = srv_last_file_size_max
1349 * ((1024 * 1024) / UNIV_PAGE_SIZE);
1351 srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1353 srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1355 srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1357 return(DB_SUCCESS);
1360 /*********************************************************************//**
1361 Boots the InnoDB server.
1362 @return DB_SUCCESS or error code */
1363 UNIV_INTERN
1364 ulint
1365 srv_boot(void)
1366 /*==========*/
1368 ulint err;
1370 /* Transform the init parameter values given by MySQL to
1371 use units we use inside InnoDB: */
1373 err = srv_normalize_init_values();
1375 if (err != DB_SUCCESS) {
1376 return(err);
1379 /* Initialize synchronization primitives, memory management, and thread
1380 local storage */
1382 srv_general_init();
1384 /* Initialize this module */
1386 srv_init();
1388 return(DB_SUCCESS);
1391 /*********************************************************************//**
1392 Reserves a slot in the thread table for the current MySQL OS thread.
1393 NOTE! The kernel mutex has to be reserved by the caller!
1394 @return reserved slot */
1395 static
1396 srv_slot_t*
1397 srv_table_reserve_slot_for_mysql(void)
1398 /*==================================*/
1400 srv_slot_t* slot;
1401 ulint i;
1403 ut_ad(mutex_own(&kernel_mutex));
1405 i = 0;
1406 slot = srv_mysql_table + i;
1408 while (slot->in_use) {
1409 i++;
1411 if (i >= OS_THREAD_MAX_N) {
1413 ut_print_timestamp(stderr);
1415 fprintf(stderr,
1416 " InnoDB: There appear to be %lu MySQL"
1417 " threads currently waiting\n"
1418 "InnoDB: inside InnoDB, which is the"
1419 " upper limit. Cannot continue operation.\n"
1420 "InnoDB: We intentionally generate"
1421 " a seg fault to print a stack trace\n"
1422 "InnoDB: on Linux. But first we print"
1423 " a list of waiting threads.\n", (ulong) i);
1425 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1427 slot = srv_mysql_table + i;
1429 fprintf(stderr,
1430 "Slot %lu: thread id %lu, type %lu,"
1431 " in use %lu, susp %lu, time %lu\n",
1432 (ulong) i,
1433 (ulong) os_thread_pf(slot->id),
1434 (ulong) slot->type,
1435 (ulong) slot->in_use,
1436 (ulong) slot->suspended,
1437 (ulong) difftime(ut_time(),
1438 slot->suspend_time));
1441 ut_error;
1444 slot = srv_mysql_table + i;
1447 ut_a(slot->in_use == FALSE);
1449 slot->in_use = TRUE;
1450 slot->id = os_thread_get_curr_id();
1451 slot->handle = os_thread_get_curr();
1453 return(slot);
1456 /***************************************************************//**
1457 Puts a MySQL OS thread to wait for a lock to be released. If an error
1458 occurs during the wait trx->error_state associated with thr is
1459 != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
1460 are possible errors. DB_DEADLOCK is returned if selective deadlock
1461 resolution chose this transaction as a victim. */
1462 UNIV_INTERN
1463 void
1464 srv_suspend_mysql_thread(
1465 /*=====================*/
1466 que_thr_t* thr) /*!< in: query thread associated with the MySQL
1467 OS thread */
1469 srv_slot_t* slot;
1470 os_event_t event;
1471 double wait_time;
1472 trx_t* trx;
1473 ulint had_dict_lock;
1474 ibool was_declared_inside_innodb = FALSE;
1475 ib_int64_t start_time = 0;
1476 ib_int64_t finish_time;
1477 ulint diff_time;
1478 ulint sec;
1479 ulint ms;
1480 ulong lock_wait_timeout;
1482 ut_ad(!mutex_own(&kernel_mutex));
1484 trx = thr_get_trx(thr);
1486 if (trx->mysql_thd != 0) {
1487 DEBUG_SYNC_C("srv_suspend_mysql_thread_enter");
1490 os_event_set(srv_lock_timeout_thread_event);
1492 mutex_enter(&kernel_mutex);
1494 trx->error_state = DB_SUCCESS;
1496 if (thr->state == QUE_THR_RUNNING) {
1498 ut_ad(thr->is_active == TRUE);
1500 /* The lock has already been released or this transaction
1501 was chosen as a deadlock victim: no need to suspend */
1503 if (trx->was_chosen_as_deadlock_victim) {
1505 trx->error_state = DB_DEADLOCK;
1506 trx->was_chosen_as_deadlock_victim = FALSE;
1509 mutex_exit(&kernel_mutex);
1511 return;
1514 ut_ad(thr->is_active == FALSE);
1516 slot = srv_table_reserve_slot_for_mysql();
1518 event = slot->event;
1520 slot->thr = thr;
1522 os_event_reset(event);
1524 slot->suspend_time = ut_time();
1526 if (thr->lock_state == QUE_THR_LOCK_ROW) {
1527 srv_n_lock_wait_count++;
1528 srv_n_lock_wait_current_count++;
1530 if (ut_usectime(&sec, &ms) == -1) {
1531 start_time = -1;
1532 } else {
1533 start_time = (ib_int64_t) sec * 1000000 + ms;
1536 /* Wake the lock timeout monitor thread, if it is suspended */
1538 os_event_set(srv_lock_timeout_thread_event);
1540 mutex_exit(&kernel_mutex);
1542 if (trx->declared_to_be_inside_innodb) {
1544 was_declared_inside_innodb = TRUE;
1546 /* We must declare this OS thread to exit InnoDB, since a
1547 possible other thread holding a lock which this thread waits
1548 for must be allowed to enter, sooner or later */
1550 srv_conc_force_exit_innodb(trx);
1553 had_dict_lock = trx->dict_operation_lock_mode;
1555 switch (had_dict_lock) {
1556 case RW_S_LATCH:
1557 /* Release foreign key check latch */
1558 row_mysql_unfreeze_data_dictionary(trx);
1559 break;
1560 case RW_X_LATCH:
1561 /* Release fast index creation latch */
1562 row_mysql_unlock_data_dictionary(trx);
1563 break;
1566 ut_a(trx->dict_operation_lock_mode == 0);
1568 /* Suspend this thread and wait for the event. */
1570 os_event_wait(event);
1572 /* After resuming, reacquire the data dictionary latch if
1573 necessary. */
1575 switch (had_dict_lock) {
1576 case RW_S_LATCH:
1577 row_mysql_freeze_data_dictionary(trx);
1578 break;
1579 case RW_X_LATCH:
1580 row_mysql_lock_data_dictionary(trx);
1581 break;
1584 if (was_declared_inside_innodb) {
1586 /* Return back inside InnoDB */
1588 srv_conc_force_enter_innodb(trx);
1591 mutex_enter(&kernel_mutex);
1593 /* Release the slot for others to use */
1595 slot->in_use = FALSE;
1597 wait_time = ut_difftime(ut_time(), slot->suspend_time);
1599 if (thr->lock_state == QUE_THR_LOCK_ROW) {
1600 if (ut_usectime(&sec, &ms) == -1) {
1601 finish_time = -1;
1602 } else {
1603 finish_time = (ib_int64_t) sec * 1000000 + ms;
1606 diff_time = (ulint) (finish_time - start_time);
1608 srv_n_lock_wait_current_count--;
1609 srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1610 if (diff_time > srv_n_lock_max_wait_time &&
1611 /* only update the variable if we successfully
1612 retrieved the start and finish times. See Bug#36819. */
1613 start_time != -1 && finish_time != -1) {
1614 srv_n_lock_max_wait_time = diff_time;
1618 if (trx->was_chosen_as_deadlock_victim) {
1620 trx->error_state = DB_DEADLOCK;
1621 trx->was_chosen_as_deadlock_victim = FALSE;
1624 mutex_exit(&kernel_mutex);
1626 /* InnoDB system transactions (such as the purge, and
1627 incomplete transactions that are being rolled back after crash
1628 recovery) will use the global value of
1629 innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1630 lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1632 if (lock_wait_timeout < 100000000
1633 && wait_time > (double) lock_wait_timeout) {
1635 trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1638 if (trx_is_interrupted(trx)) {
1640 trx->error_state = DB_INTERRUPTED;
1644 /********************************************************************//**
1645 Releases a MySQL OS thread waiting for a lock to be released, if the
1646 thread is already suspended. */
1647 UNIV_INTERN
1648 void
1649 srv_release_mysql_thread_if_suspended(
1650 /*==================================*/
1651 que_thr_t* thr) /*!< in: query thread associated with the
1652 MySQL OS thread */
1654 srv_slot_t* slot;
1655 ulint i;
1657 ut_ad(mutex_own(&kernel_mutex));
1659 for (i = 0; i < OS_THREAD_MAX_N; i++) {
1661 slot = srv_mysql_table + i;
1663 if (slot->in_use && slot->thr == thr) {
1664 /* Found */
1666 os_event_set(slot->event);
1668 return;
1672 /* not found */
1675 /******************************************************************//**
1676 Refreshes the values used to calculate per-second averages. */
1677 static
1678 void
1679 srv_refresh_innodb_monitor_stats(void)
1680 /*==================================*/
1682 mutex_enter(&srv_innodb_monitor_mutex);
1684 srv_last_monitor_time = time(NULL);
1686 os_aio_refresh_stats();
1688 btr_cur_n_sea_old = btr_cur_n_sea;
1689 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1691 log_refresh_stats();
1693 buf_refresh_io_stats();
1695 srv_n_rows_inserted_old = srv_n_rows_inserted;
1696 srv_n_rows_updated_old = srv_n_rows_updated;
1697 srv_n_rows_deleted_old = srv_n_rows_deleted;
1698 srv_n_rows_read_old = srv_n_rows_read;
1700 mutex_exit(&srv_innodb_monitor_mutex);
1703 /******************************************************************//**
1704 Outputs to a file the output of the InnoDB Monitor.
1705 @return FALSE if not all information printed
1706 due to failure to obtain necessary mutex */
1707 UNIV_INTERN
1708 ibool
1709 srv_printf_innodb_monitor(
1710 /*======================*/
1711 FILE* file, /*!< in: output stream */
1712 ibool nowait, /*!< in: whether to wait for kernel mutex */
1713 ulint* trx_start, /*!< out: file position of the start of
1714 the list of active transactions */
1715 ulint* trx_end) /*!< out: file position of the end of
1716 the list of active transactions */
1718 double time_elapsed;
1719 time_t current_time;
1720 ulint n_reserved;
1721 ibool ret;
1723 mutex_enter(&srv_innodb_monitor_mutex);
1725 current_time = time(NULL);
1727 /* We add 0.001 seconds to time_elapsed to prevent division
1728 by zero if two users happen to call SHOW INNODB STATUS at the same
1729 time */
1731 time_elapsed = difftime(current_time, srv_last_monitor_time)
1732 + 0.001;
1734 srv_last_monitor_time = time(NULL);
1736 fputs("\n=====================================\n", file);
1738 ut_print_timestamp(file);
1739 fprintf(file,
1740 " INNODB MONITOR OUTPUT\n"
1741 "=====================================\n"
1742 "Per second averages calculated from the last %lu seconds\n",
1743 (ulong)time_elapsed);
1745 fputs("-----------------\n"
1746 "BACKGROUND THREAD\n"
1747 "-----------------\n", file);
1748 srv_print_master_thread_info(file);
1750 fputs("----------\n"
1751 "SEMAPHORES\n"
1752 "----------\n", file);
1753 sync_print(file);
1755 /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1756 order level in sync0sync.h, while dict_foreign_err_mutex has a very
1757 low level 135. Therefore we can reserve the latter mutex here without
1758 a danger of a deadlock of threads. */
1760 mutex_enter(&dict_foreign_err_mutex);
1762 if (ftell(dict_foreign_err_file) != 0L) {
1763 fputs("------------------------\n"
1764 "LATEST FOREIGN KEY ERROR\n"
1765 "------------------------\n", file);
1766 ut_copy_file(file, dict_foreign_err_file);
1769 mutex_exit(&dict_foreign_err_mutex);
1771 /* Only if lock_print_info_summary proceeds correctly,
1772 before we call the lock_print_info_all_transactions
1773 to print all the lock information. */
1774 ret = lock_print_info_summary(file, nowait);
1776 if (ret) {
1777 if (trx_start) {
1778 long t = ftell(file);
1779 if (t < 0) {
1780 *trx_start = ULINT_UNDEFINED;
1781 } else {
1782 *trx_start = (ulint) t;
1785 lock_print_info_all_transactions(file);
1786 if (trx_end) {
1787 long t = ftell(file);
1788 if (t < 0) {
1789 *trx_end = ULINT_UNDEFINED;
1790 } else {
1791 *trx_end = (ulint) t;
1796 fputs("--------\n"
1797 "FILE I/O\n"
1798 "--------\n", file);
1799 os_aio_print(file);
1801 fputs("-------------------------------------\n"
1802 "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1803 "-------------------------------------\n", file);
1804 ibuf_print(file);
1806 ha_print_info(file, btr_search_sys->hash_index);
1808 fprintf(file,
1809 "%.2f hash searches/s, %.2f non-hash searches/s\n",
1810 (btr_cur_n_sea - btr_cur_n_sea_old)
1811 / time_elapsed,
1812 (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
1813 / time_elapsed);
1814 btr_cur_n_sea_old = btr_cur_n_sea;
1815 btr_cur_n_non_sea_old = btr_cur_n_non_sea;
1817 fputs("---\n"
1818 "LOG\n"
1819 "---\n", file);
1820 log_print(file);
1822 fputs("----------------------\n"
1823 "BUFFER POOL AND MEMORY\n"
1824 "----------------------\n", file);
1825 fprintf(file,
1826 "Total memory allocated " ULINTPF
1827 "; in additional pool allocated " ULINTPF "\n",
1828 ut_total_allocated_memory,
1829 mem_pool_get_reserved(mem_comm_pool));
1830 fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1831 dict_sys->size);
1833 buf_print_io(file);
1835 fputs("--------------\n"
1836 "ROW OPERATIONS\n"
1837 "--------------\n", file);
1838 fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1839 (long) srv_conc_n_threads,
1840 (ulong) srv_conc_n_waiting_threads);
1842 fprintf(file, "%lu read views open inside InnoDB\n",
1843 UT_LIST_GET_LEN(trx_sys->view_list));
1845 n_reserved = fil_space_get_n_reserved_extents(0);
1846 if (n_reserved > 0) {
1847 fprintf(file,
1848 "%lu tablespace extents now reserved for"
1849 " B-tree split operations\n",
1850 (ulong) n_reserved);
1853 #ifdef UNIV_LINUX
1854 fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
1855 (ulong) srv_main_thread_process_no,
1856 (ulong) srv_main_thread_id,
1857 srv_main_thread_op_info);
1858 #else
1859 fprintf(file, "Main thread id %lu, state: %s\n",
1860 (ulong) srv_main_thread_id,
1861 srv_main_thread_op_info);
1862 #endif
1863 fprintf(file,
1864 "Number of rows inserted " ULINTPF
1865 ", updated " ULINTPF ", deleted " ULINTPF
1866 ", read " ULINTPF "\n",
1867 srv_n_rows_inserted,
1868 srv_n_rows_updated,
1869 srv_n_rows_deleted,
1870 srv_n_rows_read);
1871 fprintf(file,
1872 "%.2f inserts/s, %.2f updates/s,"
1873 " %.2f deletes/s, %.2f reads/s\n",
1874 (srv_n_rows_inserted - srv_n_rows_inserted_old)
1875 / time_elapsed,
1876 (srv_n_rows_updated - srv_n_rows_updated_old)
1877 / time_elapsed,
1878 (srv_n_rows_deleted - srv_n_rows_deleted_old)
1879 / time_elapsed,
1880 (srv_n_rows_read - srv_n_rows_read_old)
1881 / time_elapsed);
1883 srv_n_rows_inserted_old = srv_n_rows_inserted;
1884 srv_n_rows_updated_old = srv_n_rows_updated;
1885 srv_n_rows_deleted_old = srv_n_rows_deleted;
1886 srv_n_rows_read_old = srv_n_rows_read;
1888 fputs("----------------------------\n"
1889 "END OF INNODB MONITOR OUTPUT\n"
1890 "============================\n", file);
1891 mutex_exit(&srv_innodb_monitor_mutex);
1892 fflush(file);
1894 return(ret);
1897 /******************************************************************//**
1898 Function to pass InnoDB status variables to MySQL */
1899 UNIV_INTERN
1900 void
1901 srv_export_innodb_status(void)
1902 /*==========================*/
1904 mutex_enter(&srv_innodb_monitor_mutex);
1906 export_vars.innodb_data_pending_reads
1907 = os_n_pending_reads;
1908 export_vars.innodb_data_pending_writes
1909 = os_n_pending_writes;
1910 export_vars.innodb_data_pending_fsyncs
1911 = fil_n_pending_log_flushes
1912 + fil_n_pending_tablespace_flushes;
1913 export_vars.innodb_data_fsyncs = os_n_fsyncs;
1914 export_vars.innodb_data_read = srv_data_read;
1915 export_vars.innodb_data_reads = os_n_file_reads;
1916 export_vars.innodb_data_writes = os_n_file_writes;
1917 export_vars.innodb_data_written = srv_data_written;
1918 export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets;
1919 export_vars.innodb_buffer_pool_write_requests
1920 = srv_buf_pool_write_requests;
1921 export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
1922 export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
1923 export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
1924 export_vars.innodb_buffer_pool_read_ahead_rnd
1925 = buf_pool->stat.n_ra_pages_read_rnd;
1926 export_vars.innodb_buffer_pool_read_ahead
1927 = buf_pool->stat.n_ra_pages_read;
1928 export_vars.innodb_buffer_pool_read_ahead_evicted
1929 = buf_pool->stat.n_ra_pages_evicted;
1930 export_vars.innodb_buffer_pool_pages_data
1931 = UT_LIST_GET_LEN(buf_pool->LRU);
1932 export_vars.innodb_buffer_pool_pages_dirty
1933 = UT_LIST_GET_LEN(buf_pool->flush_list);
1934 export_vars.innodb_buffer_pool_pages_free
1935 = UT_LIST_GET_LEN(buf_pool->free);
1936 #ifdef UNIV_DEBUG
1937 export_vars.innodb_buffer_pool_pages_latched
1938 = buf_get_latched_pages_number();
1939 #endif /* UNIV_DEBUG */
1940 export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
1942 export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size
1943 - UT_LIST_GET_LEN(buf_pool->LRU)
1944 - UT_LIST_GET_LEN(buf_pool->free);
1945 #ifdef HAVE_ATOMIC_BUILTINS
1946 export_vars.innodb_have_atomic_builtins = 1;
1947 #else
1948 export_vars.innodb_have_atomic_builtins = 0;
1949 #endif
1950 export_vars.innodb_page_size = UNIV_PAGE_SIZE;
1951 export_vars.innodb_log_waits = srv_log_waits;
1952 export_vars.innodb_os_log_written = srv_os_log_written;
1953 export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
1954 export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
1955 export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
1956 export_vars.innodb_log_write_requests = srv_log_write_requests;
1957 export_vars.innodb_log_writes = srv_log_writes;
1958 export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
1959 export_vars.innodb_dblwr_writes = srv_dblwr_writes;
1960 export_vars.innodb_pages_created = buf_pool->stat.n_pages_created;
1961 export_vars.innodb_pages_read = buf_pool->stat.n_pages_read;
1962 export_vars.innodb_pages_written = buf_pool->stat.n_pages_written;
1963 export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
1964 export_vars.innodb_row_lock_current_waits
1965 = srv_n_lock_wait_current_count;
1966 export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
1967 if (srv_n_lock_wait_count > 0) {
1968 export_vars.innodb_row_lock_time_avg = (ulint)
1969 (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
1970 } else {
1971 export_vars.innodb_row_lock_time_avg = 0;
1973 export_vars.innodb_row_lock_time_max
1974 = srv_n_lock_max_wait_time / 1000;
1975 export_vars.innodb_rows_read = srv_n_rows_read;
1976 export_vars.innodb_rows_inserted = srv_n_rows_inserted;
1977 export_vars.innodb_rows_updated = srv_n_rows_updated;
1978 export_vars.innodb_rows_deleted = srv_n_rows_deleted;
1980 #ifdef UNIV_DEBUG
1982 dulint done_trx_no;
1983 dulint up_limit_id;
1985 rw_lock_s_lock(&purge_sys->latch);
1986 done_trx_no = purge_sys->done_trx_no;
1987 up_limit_id = purge_sys->view
1988 ? purge_sys->view->up_limit_id
1989 : ut_dulint_zero;
1990 rw_lock_s_unlock(&purge_sys->latch);
1992 if (ut_dulint_cmp(trx_sys->max_trx_id, done_trx_no) < 0) {
1993 export_vars.innodb_purge_trx_id_age = 0;
1994 } else {
1995 export_vars.innodb_purge_trx_id_age = ut_dulint_minus(
1996 trx_sys->max_trx_id, done_trx_no);
1999 if (ut_dulint_is_zero(up_limit_id)
2000 || ut_dulint_cmp(trx_sys->max_trx_id, up_limit_id) < 0) {
2001 export_vars.innodb_purge_view_trx_id_age = 0;
2002 } else {
2003 export_vars.innodb_purge_view_trx_id_age =
2004 ut_dulint_minus(trx_sys->max_trx_id,
2005 up_limit_id);
2008 #endif /* UNIV_DEBUG */
2010 mutex_exit(&srv_innodb_monitor_mutex);
2013 /*********************************************************************//**
2014 A thread which prints the info output by various InnoDB monitors.
2015 @return a dummy parameter */
2016 UNIV_INTERN
2017 os_thread_ret_t
2018 srv_monitor_thread(
2019 /*===============*/
2020 void* arg __attribute__((unused)))
2021 /*!< in: a dummy parameter required by
2022 os_thread_create */
2024 double time_elapsed;
2025 time_t current_time;
2026 time_t last_table_monitor_time;
2027 time_t last_tablespace_monitor_time;
2028 time_t last_monitor_time;
2029 ulint mutex_skipped;
2030 ibool last_srv_print_monitor;
2032 #ifdef UNIV_DEBUG_THREAD_CREATION
2033 fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2034 os_thread_pf(os_thread_get_curr_id()));
2035 #endif
2036 UT_NOT_USED(arg);
2037 srv_last_monitor_time = time(NULL);
2038 last_table_monitor_time = time(NULL);
2039 last_tablespace_monitor_time = time(NULL);
2040 last_monitor_time = time(NULL);
2041 mutex_skipped = 0;
2042 last_srv_print_monitor = srv_print_innodb_monitor;
2043 loop:
2044 srv_monitor_active = TRUE;
2046 /* Wake up every 5 seconds to see if we need to print
2047 monitor information. */
2049 os_thread_sleep(5000000);
2051 current_time = time(NULL);
2053 time_elapsed = difftime(current_time, last_monitor_time);
2055 if (time_elapsed > 15) {
2056 last_monitor_time = time(NULL);
2058 if (srv_print_innodb_monitor) {
2059 /* Reset mutex_skipped counter everytime
2060 srv_print_innodb_monitor changes. This is to
2061 ensure we will not be blocked by kernel_mutex
2062 for short duration information printing,
2063 such as requested by sync_array_print_long_waits() */
2064 if (!last_srv_print_monitor) {
2065 mutex_skipped = 0;
2066 last_srv_print_monitor = TRUE;
2069 if (!srv_printf_innodb_monitor(stderr,
2070 MUTEX_NOWAIT(mutex_skipped),
2071 NULL, NULL)) {
2072 mutex_skipped++;
2073 } else {
2074 /* Reset the counter */
2075 mutex_skipped = 0;
2077 } else {
2078 last_srv_print_monitor = FALSE;
2082 if (srv_innodb_status) {
2083 mutex_enter(&srv_monitor_file_mutex);
2084 rewind(srv_monitor_file);
2085 if (!srv_printf_innodb_monitor(srv_monitor_file,
2086 MUTEX_NOWAIT(mutex_skipped),
2087 NULL, NULL)) {
2088 mutex_skipped++;
2089 } else {
2090 mutex_skipped = 0;
2093 os_file_set_eof(srv_monitor_file);
2094 mutex_exit(&srv_monitor_file_mutex);
2097 if (srv_print_innodb_tablespace_monitor
2098 && difftime(current_time,
2099 last_tablespace_monitor_time) > 60) {
2100 last_tablespace_monitor_time = time(NULL);
2102 fputs("========================"
2103 "========================\n",
2104 stderr);
2106 ut_print_timestamp(stderr);
2108 fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2109 "========================"
2110 "========================\n",
2111 stderr);
2113 fsp_print(0);
2114 fputs("Validating tablespace\n", stderr);
2115 fsp_validate(0);
2116 fputs("Validation ok\n"
2117 "---------------------------------------\n"
2118 "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2119 "=======================================\n",
2120 stderr);
2123 if (srv_print_innodb_table_monitor
2124 && difftime(current_time, last_table_monitor_time) > 60) {
2126 last_table_monitor_time = time(NULL);
2128 fputs("===========================================\n",
2129 stderr);
2131 ut_print_timestamp(stderr);
2133 fputs(" INNODB TABLE MONITOR OUTPUT\n"
2134 "===========================================\n",
2135 stderr);
2136 dict_print();
2138 fputs("-----------------------------------\n"
2139 "END OF INNODB TABLE MONITOR OUTPUT\n"
2140 "==================================\n",
2141 stderr);
2145 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2146 goto exit_func;
2149 if (srv_print_innodb_monitor
2150 || srv_print_innodb_lock_monitor
2151 || srv_print_innodb_tablespace_monitor
2152 || srv_print_innodb_table_monitor) {
2153 goto loop;
2156 srv_monitor_active = FALSE;
2158 goto loop;
2160 exit_func:
2161 srv_monitor_active = FALSE;
2163 /* We count the number of threads in os_thread_exit(). A created
2164 thread should always use that to exit and not use return() to exit. */
2166 os_thread_exit(NULL);
2168 OS_THREAD_DUMMY_RETURN;
2171 /*********************************************************************//**
2172 A thread which wakes up threads whose lock wait may have lasted too long.
2173 @return a dummy parameter */
2174 UNIV_INTERN
2175 os_thread_ret_t
2176 srv_lock_timeout_thread(
2177 /*====================*/
2178 void* arg __attribute__((unused)))
2179 /* in: a dummy parameter required by
2180 os_thread_create */
2182 srv_slot_t* slot;
2183 ibool some_waits;
2184 double wait_time;
2185 ulint i;
2187 loop:
2188 /* When someone is waiting for a lock, we wake up every second
2189 and check if a timeout has passed for a lock wait */
2191 os_thread_sleep(1000000);
2193 srv_lock_timeout_active = TRUE;
2195 mutex_enter(&kernel_mutex);
2197 some_waits = FALSE;
2199 /* Check of all slots if a thread is waiting there, and if it
2200 has exceeded the time limit */
2202 for (i = 0; i < OS_THREAD_MAX_N; i++) {
2204 slot = srv_mysql_table + i;
2206 if (slot->in_use) {
2207 trx_t* trx;
2208 ulong lock_wait_timeout;
2210 some_waits = TRUE;
2212 wait_time = ut_difftime(ut_time(), slot->suspend_time);
2214 trx = thr_get_trx(slot->thr);
2215 lock_wait_timeout = thd_lock_wait_timeout(
2216 trx->mysql_thd);
2218 if (trx_is_interrupted(trx)
2219 || (lock_wait_timeout < 100000000
2220 && (wait_time > (double) lock_wait_timeout
2221 || wait_time < 0))) {
2223 /* Timeout exceeded or a wrap-around in system
2224 time counter: cancel the lock request queued
2225 by the transaction and release possible
2226 other transactions waiting behind; it is
2227 possible that the lock has already been
2228 granted: in that case do nothing */
2230 if (trx->wait_lock) {
2231 lock_cancel_waiting_and_release(
2232 trx->wait_lock);
2238 os_event_reset(srv_lock_timeout_thread_event);
2240 mutex_exit(&kernel_mutex);
2242 if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
2243 goto exit_func;
2246 if (some_waits) {
2247 goto loop;
2250 srv_lock_timeout_active = FALSE;
2252 #if 0
2253 /* The following synchronisation is disabled, since
2254 the InnoDB monitor output is to be updated every 15 seconds. */
2255 os_event_wait(srv_lock_timeout_thread_event);
2256 #endif
2257 goto loop;
2259 exit_func:
2260 srv_lock_timeout_active = FALSE;
2262 /* We count the number of threads in os_thread_exit(). A created
2263 thread should always use that to exit and not use return() to exit. */
2265 os_thread_exit(NULL);
2267 OS_THREAD_DUMMY_RETURN;
2270 /*********************************************************************//**
2271 A thread which prints warnings about semaphore waits which have lasted
2272 too long. These can be used to track bugs which cause hangs.
2273 @return a dummy parameter */
2274 UNIV_INTERN
2275 os_thread_ret_t
2276 srv_error_monitor_thread(
2277 /*=====================*/
2278 void* arg __attribute__((unused)))
2279 /*!< in: a dummy parameter required by
2280 os_thread_create */
2282 /* number of successive fatal timeouts observed */
2283 ulint fatal_cnt = 0;
2284 ib_uint64_t old_lsn;
2285 ib_uint64_t new_lsn;
2286 /* longest waiting thread for a semaphore */
2287 os_thread_id_t waiter = os_thread_get_curr_id();
2288 os_thread_id_t old_waiter = waiter;
2289 /* the semaphore that is being waited for */
2290 const void* sema = NULL;
2291 const void* old_sema = NULL;
2293 old_lsn = srv_start_lsn;
2295 #ifdef UNIV_DEBUG_THREAD_CREATION
2296 fprintf(stderr, "Error monitor thread starts, id %lu\n",
2297 os_thread_pf(os_thread_get_curr_id()));
2298 #endif
2299 loop:
2300 srv_error_monitor_active = TRUE;
2302 /* Try to track a strange bug reported by Harald Fuchs and others,
2303 where the lsn seems to decrease at times */
2305 new_lsn = log_get_lsn();
2307 if (new_lsn < old_lsn) {
2308 ut_print_timestamp(stderr);
2309 fprintf(stderr,
2310 " InnoDB: Error: old log sequence number %llu"
2311 " was greater\n"
2312 "InnoDB: than the new log sequence number %llu!\n"
2313 "InnoDB: Please submit a bug report"
2314 " to http://bugs.mysql.com\n",
2315 old_lsn, new_lsn);
2318 old_lsn = new_lsn;
2320 if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2321 /* We referesh InnoDB Monitor values so that averages are
2322 printed from at most 60 last seconds */
2324 srv_refresh_innodb_monitor_stats();
2327 /* Update the statistics collected for deciding LRU
2328 eviction policy. */
2329 buf_LRU_stat_update();
2331 /* Update the statistics collected for flush rate policy. */
2332 buf_flush_stat_update();
2334 /* In case mutex_exit is not a memory barrier, it is
2335 theoretically possible some threads are left waiting though
2336 the semaphore is already released. Wake up those threads: */
2338 sync_arr_wake_threads_if_sema_free();
2340 if (sync_array_print_long_waits(&waiter, &sema)
2341 && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2342 fatal_cnt++;
2343 if (fatal_cnt > 10) {
2345 fprintf(stderr,
2346 "InnoDB: Error: semaphore wait has lasted"
2347 " > %lu seconds\n"
2348 "InnoDB: We intentionally crash the server,"
2349 " because it appears to be hung.\n",
2350 (ulong) srv_fatal_semaphore_wait_threshold);
2352 ut_error;
2354 } else {
2355 fatal_cnt = 0;
2356 old_waiter = waiter;
2357 old_sema = sema;
2360 /* Flush stderr so that a database user gets the output
2361 to possible MySQL error file */
2363 fflush(stderr);
2365 os_thread_sleep(1000000);
2367 if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
2369 goto loop;
2372 srv_error_monitor_active = FALSE;
2374 /* We count the number of threads in os_thread_exit(). A created
2375 thread should always use that to exit and not use return() to exit. */
2377 os_thread_exit(NULL);
2379 OS_THREAD_DUMMY_RETURN;
2382 /*******************************************************************//**
2383 Tells the InnoDB server that there has been activity in the database
2384 and wakes up the master thread if it is suspended (not sleeping). Used
2385 in the MySQL interface. Note that there is a small chance that the master
2386 thread stays suspended (we do not protect our operation with the kernel
2387 mutex, for performace reasons). */
2388 UNIV_INTERN
2389 void
2390 srv_active_wake_master_thread(void)
2391 /*===============================*/
2393 srv_activity_count++;
2395 if (srv_n_threads_active[SRV_MASTER] == 0) {
2397 mutex_enter(&kernel_mutex);
2399 srv_release_threads(SRV_MASTER, 1);
2401 mutex_exit(&kernel_mutex);
2405 /*******************************************************************//**
2406 Wakes up the master thread if it is suspended or being suspended. */
2407 UNIV_INTERN
2408 void
2409 srv_wake_master_thread(void)
2410 /*========================*/
2412 srv_activity_count++;
2414 mutex_enter(&kernel_mutex);
2416 srv_release_threads(SRV_MASTER, 1);
2418 mutex_exit(&kernel_mutex);
2421 /**********************************************************************
2422 The master thread is tasked to ensure that flush of log file happens
2423 once every second in the background. This is to ensure that not more
2424 than one second of trxs are lost in case of crash when
2425 innodb_flush_logs_at_trx_commit != 1 */
2426 static
2427 void
2428 srv_sync_log_buffer_in_background(void)
2429 /*===================================*/
2431 time_t current_time = time(NULL);
2433 srv_main_thread_op_info = "flushing log";
2434 if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2435 log_buffer_sync_in_background(TRUE);
2436 srv_last_log_flush_time = current_time;
2437 srv_log_writes_and_flush++;
2441 /*********************************************************************//**
2442 The master thread controlling the server.
2443 @return a dummy parameter */
2444 UNIV_INTERN
2445 os_thread_ret_t
2446 srv_master_thread(
2447 /*==============*/
2448 void* arg __attribute__((unused)))
2449 /*!< in: a dummy parameter required by
2450 os_thread_create */
2452 os_event_t event;
2453 ulint old_activity_count;
2454 ulint n_pages_purged = 0;
2455 ulint n_bytes_merged;
2456 ulint n_pages_flushed;
2457 ulint n_bytes_archived;
2458 ulint n_tables_to_drop;
2459 ulint n_ios;
2460 ulint n_ios_old;
2461 ulint n_ios_very_old;
2462 ulint n_pend_ios;
2463 ibool skip_sleep = FALSE;
2464 ulint i;
2466 #ifdef UNIV_DEBUG_THREAD_CREATION
2467 fprintf(stderr, "Master thread starts, id %lu\n",
2468 os_thread_pf(os_thread_get_curr_id()));
2469 #endif
2470 srv_main_thread_process_no = os_proc_get_number();
2471 srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2473 srv_table_reserve_slot(SRV_MASTER);
2475 mutex_enter(&kernel_mutex);
2477 srv_n_threads_active[SRV_MASTER]++;
2479 mutex_exit(&kernel_mutex);
2481 loop:
2482 /*****************************************************************/
2483 /* ---- When there is database activity by users, we cycle in this
2484 loop */
2486 srv_main_thread_op_info = "reserving kernel mutex";
2488 n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
2489 + buf_pool->stat.n_pages_written;
2490 mutex_enter(&kernel_mutex);
2492 /* Store the user activity counter at the start of this loop */
2493 old_activity_count = srv_activity_count;
2495 mutex_exit(&kernel_mutex);
2497 if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2499 goto suspend_thread;
2502 /* ---- We run the following loop approximately once per second
2503 when there is database activity */
2505 srv_last_log_flush_time = time(NULL);
2506 skip_sleep = FALSE;
2508 for (i = 0; i < 10; i++) {
2509 n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read
2510 + buf_pool->stat.n_pages_written;
2511 srv_main_thread_op_info = "sleeping";
2512 srv_main_1_second_loops++;
2514 #ifdef UNIV_DEBUG
2515 if (btr_cur_limit_optimistic_insert_debug) {
2516 /* If btr_cur_limit_optimistic_insert_debug is enabled
2517 and no purge_threads, purge opportunity is increased
2518 by x100 (1purge/100msec), to speed up debug scripts
2519 which should wait for purged. */
2521 if (!skip_sleep && !srv_shutdown_state) {
2522 os_thread_sleep(100000);
2523 srv_main_sleeps++;
2526 do {
2527 if (srv_fast_shutdown
2528 && srv_shutdown_state > 0) {
2529 goto background_loop;
2532 srv_main_thread_op_info = "purging";
2533 n_pages_purged = trx_purge();
2535 } while (n_pages_purged);
2536 } else
2537 #endif /* UNIV_DEBUG */
2538 if (!skip_sleep && !srv_shutdown_state) {
2540 os_thread_sleep(1000000);
2541 srv_main_sleeps++;
2544 skip_sleep = FALSE;
2546 /* ALTER TABLE in MySQL requires on Unix that the table handler
2547 can drop tables lazily after there no longer are SELECT
2548 queries to them. */
2550 srv_main_thread_op_info = "doing background drop tables";
2552 row_drop_tables_for_mysql_in_background();
2554 srv_main_thread_op_info = "";
2556 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2558 goto background_loop;
2561 /* Flush logs if needed */
2562 srv_sync_log_buffer_in_background();
2564 srv_main_thread_op_info = "making checkpoint";
2565 log_free_check();
2567 /* If i/os during one second sleep were less than 5% of
2568 capacity, we assume that there is free disk i/o capacity
2569 available, and it makes sense to do an insert buffer merge. */
2571 n_pend_ios = buf_get_n_pending_ios()
2572 + log_sys->n_pending_writes;
2573 n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
2574 + buf_pool->stat.n_pages_written;
2575 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2576 && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2577 srv_main_thread_op_info = "doing insert buffer merge";
2578 ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2580 /* Flush logs if needed */
2581 srv_sync_log_buffer_in_background();
2584 if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2585 > srv_max_buf_pool_modified_pct)) {
2587 /* Try to keep the number of modified pages in the
2588 buffer pool under the limit wished by the user */
2590 srv_main_thread_op_info =
2591 "flushing buffer pool pages";
2592 n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
2593 PCT_IO(100),
2594 IB_ULONGLONG_MAX);
2596 /* If we had to do the flush, it may have taken
2597 even more than 1 second, and also, there may be more
2598 to flush. Do not sleep 1 second during the next
2599 iteration of this loop. */
2601 skip_sleep = TRUE;
2602 } else if (srv_adaptive_flushing) {
2604 /* Try to keep the rate of flushing of dirty
2605 pages such that redo log generation does not
2606 produce bursts of IO at checkpoint time. */
2607 ulint n_flush = buf_flush_get_desired_flush_rate();
2609 if (n_flush) {
2610 srv_main_thread_op_info =
2611 "flushing buffer pool pages";
2612 n_flush = ut_min(PCT_IO(100), n_flush);
2613 n_pages_flushed =
2614 buf_flush_batch(
2615 BUF_FLUSH_LIST,
2616 n_flush,
2617 IB_ULONGLONG_MAX);
2619 if (n_flush == PCT_IO(100)) {
2620 skip_sleep = TRUE;
2625 if (srv_activity_count == old_activity_count) {
2627 /* There is no user activity at the moment, go to
2628 the background loop */
2630 goto background_loop;
2634 /* ---- We perform the following code approximately once per
2635 10 seconds when there is database activity */
2637 #ifdef MEM_PERIODIC_CHECK
2638 /* Check magic numbers of every allocated mem block once in 10
2639 seconds */
2640 mem_validate_all_blocks();
2641 #endif
2642 /* If i/os during the 10 second period were less than 200% of
2643 capacity, we assume that there is free disk i/o capacity
2644 available, and it makes sense to flush srv_io_capacity pages.
2646 Note that this is done regardless of the fraction of dirty
2647 pages relative to the max requested by the user. The one second
2648 loop above requests writes for that case. The writes done here
2649 are not required, and may be disabled. */
2651 n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
2652 n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read
2653 + buf_pool->stat.n_pages_written;
2655 srv_main_10_second_loops++;
2656 if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2657 && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
2659 srv_main_thread_op_info = "flushing buffer pool pages";
2660 buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100),
2661 IB_ULONGLONG_MAX);
2663 /* Flush logs if needed */
2664 srv_sync_log_buffer_in_background();
2667 /* We run a batch of insert buffer merge every 10 seconds,
2668 even if the server were active */
2670 srv_main_thread_op_info = "doing insert buffer merge";
2671 ibuf_contract_for_n_pages(FALSE, PCT_IO(5));
2673 /* Flush logs if needed */
2674 srv_sync_log_buffer_in_background();
2676 /* We run a full purge every 10 seconds, even if the server
2677 were active */
2678 do {
2680 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2682 goto background_loop;
2685 srv_main_thread_op_info = "purging";
2686 n_pages_purged = trx_purge();
2688 /* Flush logs if needed */
2689 srv_sync_log_buffer_in_background();
2691 } while (n_pages_purged);
2693 srv_main_thread_op_info = "flushing buffer pool pages";
2695 /* Flush a few oldest pages to make a new checkpoint younger */
2697 if (buf_get_modified_ratio_pct() > 70) {
2699 /* If there are lots of modified pages in the buffer pool
2700 (> 70 %), we assume we can afford reserving the disk(s) for
2701 the time it requires to flush 100 pages */
2703 n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
2704 PCT_IO(100),
2705 IB_ULONGLONG_MAX);
2706 } else {
2707 /* Otherwise, we only flush a small number of pages so that
2708 we do not unnecessarily use much disk i/o capacity from
2709 other work */
2711 n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
2712 PCT_IO(10),
2713 IB_ULONGLONG_MAX);
2716 srv_main_thread_op_info = "making checkpoint";
2718 /* Make a new checkpoint about once in 10 seconds */
2720 log_checkpoint(TRUE, FALSE);
2722 srv_main_thread_op_info = "reserving kernel mutex";
2724 mutex_enter(&kernel_mutex);
2726 /* ---- When there is database activity, we jump from here back to
2727 the start of loop */
2729 if (srv_activity_count != old_activity_count) {
2730 mutex_exit(&kernel_mutex);
2731 goto loop;
2734 mutex_exit(&kernel_mutex);
2736 /* If the database is quiet, we enter the background loop */
2738 /*****************************************************************/
2739 background_loop:
2740 /* ---- In this loop we run background operations when the server
2741 is quiet from user activity. Also in the case of a shutdown, we
2742 loop here, flushing the buffer pool to the data files. */
2744 /* The server has been quiet for a while: start running background
2745 operations */
2746 srv_main_background_loops++;
2747 srv_main_thread_op_info = "doing background drop tables";
2749 n_tables_to_drop = row_drop_tables_for_mysql_in_background();
2751 if (n_tables_to_drop > 0) {
2752 /* Do not monopolize the CPU even if there are tables waiting
2753 in the background drop queue. (It is essentially a bug if
2754 MySQL tries to drop a table while there are still open handles
2755 to it and we had to put it to the background drop queue.) */
2757 os_thread_sleep(100000);
2760 srv_main_thread_op_info = "purging";
2762 /* Run a full purge */
2763 do {
2764 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2766 break;
2769 srv_main_thread_op_info = "purging";
2770 n_pages_purged = trx_purge();
2772 /* Flush logs if needed */
2773 srv_sync_log_buffer_in_background();
2775 } while (n_pages_purged);
2777 srv_main_thread_op_info = "reserving kernel mutex";
2779 mutex_enter(&kernel_mutex);
2780 if (srv_activity_count != old_activity_count) {
2781 mutex_exit(&kernel_mutex);
2782 goto loop;
2784 mutex_exit(&kernel_mutex);
2786 srv_main_thread_op_info = "doing insert buffer merge";
2788 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2789 n_bytes_merged = 0;
2790 } else {
2791 /* This should do an amount of IO similar to the number of
2792 dirty pages that will be flushed in the call to
2793 buf_flush_batch below. Otherwise, the system favors
2794 clean pages over cleanup throughput. */
2795 n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
2796 PCT_IO(100));
2799 srv_main_thread_op_info = "reserving kernel mutex";
2801 mutex_enter(&kernel_mutex);
2802 if (srv_activity_count != old_activity_count) {
2803 mutex_exit(&kernel_mutex);
2804 goto loop;
2806 mutex_exit(&kernel_mutex);
2808 flush_loop:
2809 srv_main_thread_op_info = "flushing buffer pool pages";
2810 srv_main_flush_loops++;
2811 if (srv_fast_shutdown < 2) {
2812 n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
2813 PCT_IO(100),
2814 IB_ULONGLONG_MAX);
2815 } else {
2816 /* In the fastest shutdown we do not flush the buffer pool
2817 to data files: we set n_pages_flushed to 0 artificially. */
2819 n_pages_flushed = 0;
2822 srv_main_thread_op_info = "reserving kernel mutex";
2824 mutex_enter(&kernel_mutex);
2825 if (srv_activity_count != old_activity_count) {
2826 mutex_exit(&kernel_mutex);
2827 goto loop;
2829 mutex_exit(&kernel_mutex);
2831 srv_main_thread_op_info = "waiting for buffer pool flush to end";
2832 buf_flush_wait_batch_end(BUF_FLUSH_LIST);
2834 /* Flush logs if needed */
2835 srv_sync_log_buffer_in_background();
2837 srv_main_thread_op_info = "making checkpoint";
2839 log_checkpoint(TRUE, FALSE);
2841 if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
2843 /* Try to keep the number of modified pages in the
2844 buffer pool under the limit wished by the user */
2846 goto flush_loop;
2849 srv_main_thread_op_info = "reserving kernel mutex";
2851 mutex_enter(&kernel_mutex);
2852 if (srv_activity_count != old_activity_count) {
2853 mutex_exit(&kernel_mutex);
2854 goto loop;
2856 mutex_exit(&kernel_mutex);
2858 srv_main_thread_op_info = "archiving log (if log archive is on)";
2860 log_archive_do(FALSE, &n_bytes_archived);
2862 n_bytes_archived = 0;
2864 /* Keep looping in the background loop if still work to do */
2866 if (srv_fast_shutdown && srv_shutdown_state > 0) {
2867 if (n_tables_to_drop + n_pages_flushed
2868 + n_bytes_archived != 0) {
2870 /* If we are doing a fast shutdown (= the default)
2871 we do not do purge or insert buffer merge. But we
2872 flush the buffer pool completely to disk.
2873 In a 'very fast' shutdown we do not flush the buffer
2874 pool to data files: we have set n_pages_flushed to
2875 0 artificially. */
2877 goto background_loop;
2879 } else if (n_tables_to_drop
2880 + n_pages_purged + n_bytes_merged + n_pages_flushed
2881 + n_bytes_archived != 0) {
2882 /* In a 'slow' shutdown we run purge and the insert buffer
2883 merge to completion */
2885 goto background_loop;
2888 /* There is no work for background operations either: suspend
2889 master thread to wait for more server activity */
2891 suspend_thread:
2892 srv_main_thread_op_info = "suspending";
2894 mutex_enter(&kernel_mutex);
2896 if (row_get_background_drop_list_len_low() > 0) {
2897 mutex_exit(&kernel_mutex);
2899 goto loop;
2902 event = srv_suspend_thread();
2904 mutex_exit(&kernel_mutex);
2906 /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
2907 waits for database activity to die down when converting < 4.1.x
2908 databases, and relies on this string being exactly as it is. InnoDB
2909 manual also mentions this string in several places. */
2910 srv_main_thread_op_info = "waiting for server activity";
2912 os_event_wait(event);
2914 if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
2915 /* This is only extra safety, the thread should exit
2916 already when the event wait ends */
2918 os_thread_exit(NULL);
2921 /* When there is user activity, InnoDB will set the event and the
2922 main thread goes back to loop. */
2924 goto loop;
2926 OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */