mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / trx / trx0roll.c
blob4aae1aa4e8723e4e50196f9ca01fda82406d8074
1 /*****************************************************************************
3 Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *****************************************************************************/
19 /**************************************************//**
20 @file trx/trx0roll.c
21 Transaction rollback
23 Created 3/26/1996 Heikki Tuuri
24 *******************************************************/
26 #include "trx0roll.h"
28 #ifdef UNIV_NONINL
29 #include "trx0roll.ic"
30 #endif
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "trx0rseg.h"
35 #include "trx0trx.h"
36 #include "trx0undo.h"
37 #include "trx0rec.h"
38 #include "que0que.h"
39 #include "usr0sess.h"
40 #include "srv0que.h"
41 #include "srv0start.h"
42 #include "row0undo.h"
43 #include "row0mysql.h"
44 #include "lock0lock.h"
45 #include "pars0pars.h"
47 /** This many pages must be undone before a truncate is tried within
48 rollback */
49 #define TRX_ROLL_TRUNC_THRESHOLD 1
51 /** In crash recovery, the current trx to be rolled back; NULL otherwise */
52 static const trx_t* trx_roll_crash_recv_trx = NULL;
54 /** In crash recovery we set this to the undo n:o of the current trx to be
55 rolled back. Then we can print how many % the rollback has progressed. */
56 static ib_int64_t trx_roll_max_undo_no;
58 /** Auxiliary variable which tells the previous progress % we printed */
59 static ulint trx_roll_progress_printed_pct;
61 /*******************************************************************//**
62 Rollback a transaction used in MySQL.
63 @return error code or DB_SUCCESS */
64 UNIV_INTERN
65 int
66 trx_general_rollback_for_mysql(
67 /*===========================*/
68 trx_t* trx, /*!< in: transaction handle */
69 trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
70 partial rollback requested, or NULL for
71 complete rollback */
73 mem_heap_t* heap;
74 que_thr_t* thr;
75 roll_node_t* roll_node;
77 /* Tell Innobase server that there might be work for
78 utility threads: */
80 srv_active_wake_master_thread();
82 trx_start_if_not_started(trx);
84 heap = mem_heap_create(512);
86 roll_node = roll_node_create(heap);
88 if (savept) {
89 roll_node->partial = TRUE;
90 roll_node->savept = *savept;
93 trx->error_state = DB_SUCCESS;
95 thr = pars_complete_graph_for_exec(roll_node, trx, heap);
97 ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
98 que_run_threads(thr);
100 mutex_enter(&kernel_mutex);
102 while (trx->que_state != TRX_QUE_RUNNING) {
104 mutex_exit(&kernel_mutex);
106 os_thread_sleep(100000);
108 mutex_enter(&kernel_mutex);
111 mutex_exit(&kernel_mutex);
113 mem_heap_free(heap);
115 ut_a(trx->error_state == DB_SUCCESS);
117 /* Tell Innobase server that there might be work for
118 utility threads: */
120 srv_active_wake_master_thread();
122 return((int) trx->error_state);
125 /*******************************************************************//**
126 Rollback a transaction used in MySQL.
127 @return error code or DB_SUCCESS */
128 UNIV_INTERN
130 trx_rollback_for_mysql(
131 /*===================*/
132 trx_t* trx) /*!< in: transaction handle */
134 int err;
136 if (trx->conc_state == TRX_NOT_STARTED) {
138 return(DB_SUCCESS);
141 trx->op_info = "rollback";
143 /* If we are doing the XA recovery of prepared transactions, then
144 the transaction object does not have an InnoDB session object, and we
145 set a dummy session that we use for all MySQL transactions. */
147 err = trx_general_rollback_for_mysql(trx, NULL);
149 trx->op_info = "";
151 return(err);
154 /*******************************************************************//**
155 Rollback the latest SQL statement for MySQL.
156 @return error code or DB_SUCCESS */
157 UNIV_INTERN
159 trx_rollback_last_sql_stat_for_mysql(
160 /*=================================*/
161 trx_t* trx) /*!< in: transaction handle */
163 int err;
165 if (trx->conc_state == TRX_NOT_STARTED) {
167 return(DB_SUCCESS);
170 trx->op_info = "rollback of SQL statement";
172 err = trx_general_rollback_for_mysql(trx, &trx->last_sql_stat_start);
173 /* The following call should not be needed, but we play safe: */
174 trx_mark_sql_stat_end(trx);
176 trx->op_info = "";
178 return(err);
181 /*******************************************************************//**
182 Frees a single savepoint struct. */
183 UNIV_INTERN
184 void
185 trx_roll_savepoint_free(
186 /*=====================*/
187 trx_t* trx, /*!< in: transaction handle */
188 trx_named_savept_t* savep) /*!< in: savepoint to free */
190 ut_a(savep != NULL);
191 ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
193 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
194 mem_free(savep->name);
195 mem_free(savep);
198 /*******************************************************************//**
199 Frees savepoint structs starting from savep, if savep == NULL then
200 free all savepoints. */
201 UNIV_INTERN
202 void
203 trx_roll_savepoints_free(
204 /*=====================*/
205 trx_t* trx, /*!< in: transaction handle */
206 trx_named_savept_t* savep) /*!< in: free all savepoints > this one;
207 if this is NULL, free all savepoints
208 of trx */
210 trx_named_savept_t* next_savep;
212 if (savep == NULL) {
213 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
214 } else {
215 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
218 while (savep != NULL) {
219 next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
221 trx_roll_savepoint_free(trx, savep);
223 savep = next_savep;
227 /*******************************************************************//**
228 Rolls back a transaction back to a named savepoint. Modifications after the
229 savepoint are undone but InnoDB does NOT release the corresponding locks
230 which are stored in memory. If a lock is 'implicit', that is, a new inserted
231 row holds a lock where the lock information is carried by the trx id stored in
232 the row, these locks are naturally released in the rollback. Savepoints which
233 were set after this savepoint are deleted.
234 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
235 otherwise DB_SUCCESS */
236 UNIV_INTERN
237 ulint
238 trx_rollback_to_savepoint_for_mysql(
239 /*================================*/
240 trx_t* trx, /*!< in: transaction handle */
241 const char* savepoint_name, /*!< in: savepoint name */
242 ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
243 position corresponding to this
244 savepoint; MySQL needs this
245 information to remove the
246 binlog entries of the queries
247 executed after the savepoint */
249 trx_named_savept_t* savep;
250 ulint err;
252 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
254 while (savep != NULL) {
255 if (0 == ut_strcmp(savep->name, savepoint_name)) {
256 /* Found */
257 break;
259 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
262 if (savep == NULL) {
264 return(DB_NO_SAVEPOINT);
267 if (trx->conc_state == TRX_NOT_STARTED) {
268 ut_print_timestamp(stderr);
269 fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
270 ut_print_name(stderr, trx, FALSE, savep->name);
271 fputs(" though it is not started\n", stderr);
272 return(DB_ERROR);
275 /* We can now free all savepoints strictly later than this one */
277 trx_roll_savepoints_free(trx, savep);
279 *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
281 trx->op_info = "rollback to a savepoint";
283 err = trx_general_rollback_for_mysql(trx, &savep->savept);
285 /* Store the current undo_no of the transaction so that we know where
286 to roll back if we have to roll back the next SQL statement: */
288 trx_mark_sql_stat_end(trx);
290 trx->op_info = "";
292 return(err);
295 /*******************************************************************//**
296 Creates a named savepoint. If the transaction is not yet started, starts it.
297 If there is already a savepoint of the same name, this call erases that old
298 savepoint and replaces it with a new. Savepoints are deleted in a transaction
299 commit or rollback.
300 @return always DB_SUCCESS */
301 UNIV_INTERN
302 ulint
303 trx_savepoint_for_mysql(
304 /*====================*/
305 trx_t* trx, /*!< in: transaction handle */
306 const char* savepoint_name, /*!< in: savepoint name */
307 ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
308 position corresponding to this
309 connection at the time of the
310 savepoint */
312 trx_named_savept_t* savep;
314 ut_a(trx);
315 ut_a(savepoint_name);
317 trx_start_if_not_started(trx);
319 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
321 while (savep != NULL) {
322 if (0 == ut_strcmp(savep->name, savepoint_name)) {
323 /* Found */
324 break;
326 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
329 if (savep) {
330 /* There is a savepoint with the same name: free that */
332 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
334 mem_free(savep->name);
335 mem_free(savep);
338 /* Create a new savepoint and add it as the last in the list */
340 savep = mem_alloc(sizeof(trx_named_savept_t));
342 savep->name = mem_strdup(savepoint_name);
344 savep->savept = trx_savept_take(trx);
346 savep->mysql_binlog_cache_pos = binlog_cache_pos;
348 UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
350 return(DB_SUCCESS);
353 /*******************************************************************//**
354 Releases only the named savepoint. Savepoints which were set after this
355 savepoint are left as is.
356 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
357 otherwise DB_SUCCESS */
358 UNIV_INTERN
359 ulint
360 trx_release_savepoint_for_mysql(
361 /*============================*/
362 trx_t* trx, /*!< in: transaction handle */
363 const char* savepoint_name) /*!< in: savepoint name */
365 trx_named_savept_t* savep;
367 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
369 /* Search for the savepoint by name and free if found. */
370 while (savep != NULL) {
371 if (0 == ut_strcmp(savep->name, savepoint_name)) {
372 trx_roll_savepoint_free(trx, savep);
373 return(DB_SUCCESS);
375 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
378 return(DB_NO_SAVEPOINT);
381 /*******************************************************************//**
382 Determines if this transaction is rolling back an incomplete transaction
383 in crash recovery.
384 @return TRUE if trx is an incomplete transaction that is being rolled
385 back in crash recovery */
386 UNIV_INTERN
387 ibool
388 trx_is_recv(
389 /*========*/
390 const trx_t* trx) /*!< in: transaction */
392 return(trx == trx_roll_crash_recv_trx);
395 /*******************************************************************//**
396 Returns a transaction savepoint taken at this point in time.
397 @return savepoint */
398 UNIV_INTERN
399 trx_savept_t
400 trx_savept_take(
401 /*============*/
402 trx_t* trx) /*!< in: transaction */
404 trx_savept_t savept;
406 savept.least_undo_no = trx->undo_no;
408 return(savept);
411 /*******************************************************************//**
412 Roll back an active transaction. */
413 static
414 void
415 trx_rollback_active(
416 /*================*/
417 trx_t* trx) /*!< in/out: transaction */
419 mem_heap_t* heap;
420 que_fork_t* fork;
421 que_thr_t* thr;
422 roll_node_t* roll_node;
423 dict_table_t* table;
424 ib_int64_t rows_to_undo;
425 const char* unit = "";
426 ibool dictionary_locked = FALSE;
428 heap = mem_heap_create(512);
430 fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
431 fork->trx = trx;
433 thr = que_thr_create(fork, heap);
435 roll_node = roll_node_create(heap);
437 thr->child = roll_node;
438 roll_node->common.parent = thr;
440 mutex_enter(&kernel_mutex);
442 trx->graph = fork;
444 ut_a(thr == que_fork_start_command(fork));
446 trx_roll_crash_recv_trx = trx;
447 trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
448 trx_roll_progress_printed_pct = 0;
449 rows_to_undo = trx_roll_max_undo_no;
451 if (rows_to_undo > 1000000000) {
452 rows_to_undo = rows_to_undo / 1000000;
453 unit = "M";
456 ut_print_timestamp(stderr);
457 fprintf(stderr,
458 " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
459 " rows to undo\n",
460 TRX_ID_PREP_PRINTF(trx->id),
461 (ulong) rows_to_undo, unit);
462 mutex_exit(&kernel_mutex);
464 trx->mysql_thread_id = os_thread_get_curr_id();
466 trx->mysql_process_no = os_proc_get_number();
468 if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
469 row_mysql_lock_data_dictionary(trx);
470 dictionary_locked = TRUE;
473 que_run_threads(thr);
475 mutex_enter(&kernel_mutex);
477 while (trx->que_state != TRX_QUE_RUNNING) {
479 mutex_exit(&kernel_mutex);
481 fprintf(stderr,
482 "InnoDB: Waiting for rollback of trx id %lu to end\n",
483 (ulong) ut_dulint_get_low(trx->id));
484 os_thread_sleep(100000);
486 mutex_enter(&kernel_mutex);
489 mutex_exit(&kernel_mutex);
491 if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
492 && !ut_dulint_is_zero(trx->table_id)) {
494 /* If the transaction was for a dictionary operation, we
495 drop the relevant table, if it still exists */
497 fprintf(stderr,
498 "InnoDB: Dropping table with id %lu %lu"
499 " in recovery if it exists\n",
500 (ulong) ut_dulint_get_high(trx->table_id),
501 (ulong) ut_dulint_get_low(trx->table_id));
503 table = dict_table_get_on_id_low(trx->table_id);
505 if (table) {
506 ulint err;
508 fputs("InnoDB: Table found: dropping table ", stderr);
509 ut_print_name(stderr, trx, TRUE, table->name);
510 fputs(" in recovery\n", stderr);
512 err = row_drop_table_for_mysql(table->name, trx, TRUE);
513 trx_commit_for_mysql(trx);
515 ut_a(err == (int) DB_SUCCESS);
519 if (dictionary_locked) {
520 row_mysql_unlock_data_dictionary(trx);
523 fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
524 " completed\n",
525 TRX_ID_PREP_PRINTF(trx->id));
526 mem_heap_free(heap);
528 trx_roll_crash_recv_trx = NULL;
531 /*******************************************************************//**
532 Rollback or clean up any incomplete transactions which were
533 encountered in crash recovery. If the transaction already was
534 committed, then we clean up a possible insert undo log. If the
535 transaction was not yet committed, then we roll it back. */
536 UNIV_INTERN
537 void
538 trx_rollback_or_clean_recovered(
539 /*============================*/
540 ibool all) /*!< in: FALSE=roll back dictionary transactions;
541 TRUE=roll back all non-PREPARED transactions */
543 trx_t* trx;
545 mutex_enter(&kernel_mutex);
547 if (!UT_LIST_GET_FIRST(trx_sys->trx_list)) {
548 goto leave_function;
551 if (all) {
552 fprintf(stderr,
553 "InnoDB: Starting in background the rollback"
554 " of uncommitted transactions\n");
557 mutex_exit(&kernel_mutex);
559 loop:
560 mutex_enter(&kernel_mutex);
562 for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
563 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
564 if (!trx->is_recovered) {
565 continue;
568 switch (trx->conc_state) {
569 case TRX_NOT_STARTED:
570 case TRX_PREPARED:
571 continue;
573 case TRX_COMMITTED_IN_MEMORY:
574 mutex_exit(&kernel_mutex);
575 fprintf(stderr,
576 "InnoDB: Cleaning up trx with id "
577 TRX_ID_FMT "\n",
578 TRX_ID_PREP_PRINTF(trx->id));
579 trx_cleanup_at_db_startup(trx);
580 goto loop;
582 case TRX_ACTIVE:
583 if (all || trx_get_dict_operation(trx)
584 != TRX_DICT_OP_NONE) {
585 mutex_exit(&kernel_mutex);
586 trx_rollback_active(trx);
587 goto loop;
592 if (all) {
593 ut_print_timestamp(stderr);
594 fprintf(stderr,
595 " InnoDB: Rollback of non-prepared"
596 " transactions completed\n");
599 leave_function:
600 mutex_exit(&kernel_mutex);
603 /*******************************************************************//**
604 Rollback or clean up any incomplete transactions which were
605 encountered in crash recovery. If the transaction already was
606 committed, then we clean up a possible insert undo log. If the
607 transaction was not yet committed, then we roll it back.
608 Note: this is done in a background thread.
609 @return a dummy parameter */
610 UNIV_INTERN
611 os_thread_ret_t
612 trx_rollback_or_clean_all_recovered(
613 /*================================*/
614 void* arg __attribute__((unused)))
615 /*!< in: a dummy parameter required by
616 os_thread_create */
618 trx_rollback_or_clean_recovered(TRUE);
620 /* We count the number of threads in os_thread_exit(). A created
621 thread should always use that to exit and not use return() to exit. */
623 os_thread_exit(NULL);
625 OS_THREAD_DUMMY_RETURN;
628 /*******************************************************************//**
629 Creates an undo number array.
630 @return own: undo number array */
631 UNIV_INTERN
632 trx_undo_arr_t*
633 trx_undo_arr_create(void)
634 /*=====================*/
636 trx_undo_arr_t* arr;
637 mem_heap_t* heap;
638 ulint i;
640 heap = mem_heap_create(1024);
642 arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
644 arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
645 * UNIV_MAX_PARALLELISM);
646 arr->n_cells = UNIV_MAX_PARALLELISM;
647 arr->n_used = 0;
649 arr->heap = heap;
651 for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
653 (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
656 return(arr);
659 /*******************************************************************//**
660 Frees an undo number array. */
661 UNIV_INTERN
662 void
663 trx_undo_arr_free(
664 /*==============*/
665 trx_undo_arr_t* arr) /*!< in: undo number array */
667 ut_ad(arr->n_used == 0);
669 mem_heap_free(arr->heap);
672 /*******************************************************************//**
673 Stores info of an undo log record to the array if it is not stored yet.
674 @return FALSE if the record already existed in the array */
675 static
676 ibool
677 trx_undo_arr_store_info(
678 /*====================*/
679 trx_t* trx, /*!< in: transaction */
680 undo_no_t undo_no)/*!< in: undo number */
682 trx_undo_inf_t* cell;
683 trx_undo_inf_t* stored_here;
684 trx_undo_arr_t* arr;
685 ulint n_used;
686 ulint n;
687 ulint i;
689 n = 0;
690 arr = trx->undo_no_arr;
691 n_used = arr->n_used;
692 stored_here = NULL;
694 for (i = 0;; i++) {
695 cell = trx_undo_arr_get_nth_info(arr, i);
697 if (!cell->in_use) {
698 if (!stored_here) {
699 /* Not in use, we may store here */
700 cell->undo_no = undo_no;
701 cell->in_use = TRUE;
703 arr->n_used++;
705 stored_here = cell;
707 } else {
708 n++;
710 if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
712 if (stored_here) {
713 stored_here->in_use = FALSE;
714 ut_ad(arr->n_used > 0);
715 arr->n_used--;
718 ut_ad(arr->n_used == n_used);
720 return(FALSE);
724 if (n == n_used && stored_here) {
726 ut_ad(arr->n_used == 1 + n_used);
728 return(TRUE);
733 /*******************************************************************//**
734 Removes an undo number from the array. */
735 static
736 void
737 trx_undo_arr_remove_info(
738 /*=====================*/
739 trx_undo_arr_t* arr, /*!< in: undo number array */
740 undo_no_t undo_no)/*!< in: undo number */
742 trx_undo_inf_t* cell;
743 ulint i;
745 for (i = 0;; i++) {
746 cell = trx_undo_arr_get_nth_info(arr, i);
748 if (cell->in_use
749 && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
751 cell->in_use = FALSE;
753 ut_ad(arr->n_used > 0);
755 arr->n_used--;
757 return;
762 /*******************************************************************//**
763 Gets the biggest undo number in an array.
764 @return biggest value, ut_dulint_zero if the array is empty */
765 static
766 undo_no_t
767 trx_undo_arr_get_biggest(
768 /*=====================*/
769 trx_undo_arr_t* arr) /*!< in: undo number array */
771 trx_undo_inf_t* cell;
772 ulint n_used;
773 undo_no_t biggest;
774 ulint n;
775 ulint i;
777 n = 0;
778 n_used = arr->n_used;
779 biggest = ut_dulint_zero;
781 for (i = 0;; i++) {
782 cell = trx_undo_arr_get_nth_info(arr, i);
784 if (cell->in_use) {
785 n++;
786 if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
788 biggest = cell->undo_no;
792 if (n == n_used) {
793 return(biggest);
798 /***********************************************************************//**
799 Tries truncate the undo logs. */
800 UNIV_INTERN
801 void
802 trx_roll_try_truncate(
803 /*==================*/
804 trx_t* trx) /*!< in/out: transaction */
806 trx_undo_arr_t* arr;
807 undo_no_t limit;
808 undo_no_t biggest;
810 ut_ad(mutex_own(&(trx->undo_mutex)));
811 ut_ad(mutex_own(&((trx->rseg)->mutex)));
813 trx->pages_undone = 0;
815 arr = trx->undo_no_arr;
817 limit = trx->undo_no;
819 if (arr->n_used > 0) {
820 biggest = trx_undo_arr_get_biggest(arr);
822 if (ut_dulint_cmp(biggest, limit) >= 0) {
824 limit = ut_dulint_add(biggest, 1);
828 if (trx->insert_undo) {
829 trx_undo_truncate_end(trx, trx->insert_undo, limit);
832 if (trx->update_undo) {
833 trx_undo_truncate_end(trx, trx->update_undo, limit);
837 /***********************************************************************//**
838 Pops the topmost undo log record in a single undo log and updates the info
839 about the topmost record in the undo log memory struct.
840 @return undo log record, the page s-latched */
841 static
842 trx_undo_rec_t*
843 trx_roll_pop_top_rec(
844 /*=================*/
845 trx_t* trx, /*!< in: transaction */
846 trx_undo_t* undo, /*!< in: undo log */
847 mtr_t* mtr) /*!< in: mtr */
849 page_t* undo_page;
850 ulint offset;
851 trx_undo_rec_t* prev_rec;
852 page_t* prev_rec_page;
854 ut_ad(mutex_own(&(trx->undo_mutex)));
856 undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
857 undo->top_page_no, mtr);
858 offset = undo->top_offset;
860 /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
861 os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
862 ut_dulint_get_low(undo->top_undo_no)); */
864 prev_rec = trx_undo_get_prev_rec(undo_page + offset,
865 undo->hdr_page_no, undo->hdr_offset,
866 mtr);
867 if (prev_rec == NULL) {
869 undo->empty = TRUE;
870 } else {
871 prev_rec_page = page_align(prev_rec);
873 if (prev_rec_page != undo_page) {
875 trx->pages_undone++;
878 undo->top_page_no = page_get_page_no(prev_rec_page);
879 undo->top_offset = prev_rec - prev_rec_page;
880 undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
883 return(undo_page + offset);
886 /********************************************************************//**
887 Pops the topmost record when the two undo logs of a transaction are seen
888 as a single stack of records ordered by their undo numbers. Inserts the
889 undo number of the popped undo record to the array of currently processed
890 undo numbers in the transaction. When the query thread finishes processing
891 of this undo record, it must be released with trx_undo_rec_release.
892 @return undo log record copied to heap, NULL if none left, or if the
893 undo number of the top record would be less than the limit */
894 UNIV_INTERN
895 trx_undo_rec_t*
896 trx_roll_pop_top_rec_of_trx(
897 /*========================*/
898 trx_t* trx, /*!< in: transaction */
899 undo_no_t limit, /*!< in: least undo number we need */
900 roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
901 mem_heap_t* heap) /*!< in: memory heap where copied */
903 trx_undo_t* undo;
904 trx_undo_t* ins_undo;
905 trx_undo_t* upd_undo;
906 trx_undo_rec_t* undo_rec;
907 trx_undo_rec_t* undo_rec_copy;
908 undo_no_t undo_no;
909 ibool is_insert;
910 trx_rseg_t* rseg;
911 ulint progress_pct;
912 mtr_t mtr;
914 rseg = trx->rseg;
915 try_again:
916 mutex_enter(&(trx->undo_mutex));
918 if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
919 mutex_enter(&(rseg->mutex));
921 trx_roll_try_truncate(trx);
923 mutex_exit(&(rseg->mutex));
926 ins_undo = trx->insert_undo;
927 upd_undo = trx->update_undo;
929 if (!ins_undo || ins_undo->empty) {
930 undo = upd_undo;
931 } else if (!upd_undo || upd_undo->empty) {
932 undo = ins_undo;
933 } else if (ut_dulint_cmp(upd_undo->top_undo_no,
934 ins_undo->top_undo_no) > 0) {
935 undo = upd_undo;
936 } else {
937 undo = ins_undo;
940 if (!undo || undo->empty
941 || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
943 if ((trx->undo_no_arr)->n_used == 0) {
944 /* Rollback is ending */
946 mutex_enter(&(rseg->mutex));
948 trx_roll_try_truncate(trx);
950 mutex_exit(&(rseg->mutex));
953 mutex_exit(&(trx->undo_mutex));
955 return(NULL);
958 if (undo == ins_undo) {
959 is_insert = TRUE;
960 } else {
961 is_insert = FALSE;
964 *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
965 undo->top_page_no,
966 undo->top_offset);
967 mtr_start(&mtr);
969 undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
971 undo_no = trx_undo_rec_get_undo_no(undo_rec);
973 ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
975 /* We print rollback progress info if we are in a crash recovery
976 and the transaction has at least 1000 row operations to undo. */
978 if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
980 progress_pct = 100 - (ulint)
981 ((ut_conv_dulint_to_longlong(undo_no) * 100)
982 / trx_roll_max_undo_no);
983 if (progress_pct != trx_roll_progress_printed_pct) {
984 if (trx_roll_progress_printed_pct == 0) {
985 fprintf(stderr,
986 "\nInnoDB: Progress in percents:"
987 " %lu", (ulong) progress_pct);
988 } else {
989 fprintf(stderr,
990 " %lu", (ulong) progress_pct);
992 fflush(stderr);
993 trx_roll_progress_printed_pct = progress_pct;
997 trx->undo_no = undo_no;
999 if (!trx_undo_arr_store_info(trx, undo_no)) {
1000 /* A query thread is already processing this undo log record */
1002 mutex_exit(&(trx->undo_mutex));
1004 mtr_commit(&mtr);
1006 goto try_again;
1009 undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
1011 mutex_exit(&(trx->undo_mutex));
1013 mtr_commit(&mtr);
1015 return(undo_rec_copy);
1018 /********************************************************************//**
1019 Reserves an undo log record for a query thread to undo. This should be
1020 called if the query thread gets the undo log record not using the pop
1021 function above.
1022 @return TRUE if succeeded */
1023 UNIV_INTERN
1024 ibool
1025 trx_undo_rec_reserve(
1026 /*=================*/
1027 trx_t* trx, /*!< in/out: transaction */
1028 undo_no_t undo_no)/*!< in: undo number of the record */
1030 ibool ret;
1032 mutex_enter(&(trx->undo_mutex));
1034 ret = trx_undo_arr_store_info(trx, undo_no);
1036 mutex_exit(&(trx->undo_mutex));
1038 return(ret);
1041 /*******************************************************************//**
1042 Releases a reserved undo record. */
1043 UNIV_INTERN
1044 void
1045 trx_undo_rec_release(
1046 /*=================*/
1047 trx_t* trx, /*!< in/out: transaction */
1048 undo_no_t undo_no)/*!< in: undo number */
1050 trx_undo_arr_t* arr;
1052 mutex_enter(&(trx->undo_mutex));
1054 arr = trx->undo_no_arr;
1056 trx_undo_arr_remove_info(arr, undo_no);
1058 mutex_exit(&(trx->undo_mutex));
1061 /*********************************************************************//**
1062 Starts a rollback operation. */
1063 UNIV_INTERN
1064 void
1065 trx_rollback(
1066 /*=========*/
1067 trx_t* trx, /*!< in: transaction */
1068 trx_sig_t* sig, /*!< in: signal starting the rollback */
1069 que_thr_t** next_thr)/*!< in/out: next query thread to run;
1070 if the value which is passed in is
1071 a pointer to a NULL pointer, then the
1072 calling function can start running
1073 a new query thread; if the passed value is
1074 NULL, the parameter is ignored */
1076 que_t* roll_graph;
1077 que_thr_t* thr;
1078 /* que_thr_t* thr2; */
1080 ut_ad(mutex_own(&kernel_mutex));
1081 ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
1083 /* Initialize the rollback field in the transaction */
1085 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1087 trx->roll_limit = ut_dulint_zero;
1089 } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1091 trx->roll_limit = (sig->savept).least_undo_no;
1093 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1095 trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
1096 } else {
1097 ut_error;
1100 ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
1102 trx->pages_undone = 0;
1104 if (trx->undo_no_arr == NULL) {
1105 trx->undo_no_arr = trx_undo_arr_create();
1108 /* Build a 'query' graph which will perform the undo operations */
1110 roll_graph = trx_roll_graph_build(trx);
1112 trx->graph = roll_graph;
1113 trx->que_state = TRX_QUE_ROLLING_BACK;
1115 thr = que_fork_start_command(roll_graph);
1117 ut_ad(thr);
1119 /* thr2 = que_fork_start_command(roll_graph);
1121 ut_ad(thr2); */
1123 if (next_thr && (*next_thr == NULL)) {
1124 *next_thr = thr;
1125 /* srv_que_task_enqueue_low(thr2); */
1126 } else {
1127 srv_que_task_enqueue_low(thr);
1128 /* srv_que_task_enqueue_low(thr2); */
1132 /****************************************************************//**
1133 Builds an undo 'query' graph for a transaction. The actual rollback is
1134 performed by executing this query graph like a query subprocedure call.
1135 The reply about the completion of the rollback will be sent by this
1136 graph.
1137 @return own: the query graph */
1138 UNIV_INTERN
1139 que_t*
1140 trx_roll_graph_build(
1141 /*=================*/
1142 trx_t* trx) /*!< in: trx handle */
1144 mem_heap_t* heap;
1145 que_fork_t* fork;
1146 que_thr_t* thr;
1147 /* que_thr_t* thr2; */
1149 ut_ad(mutex_own(&kernel_mutex));
1151 heap = mem_heap_create(512);
1152 fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
1153 fork->trx = trx;
1155 thr = que_thr_create(fork, heap);
1156 /* thr2 = que_thr_create(fork, heap); */
1158 thr->child = row_undo_node_create(trx, thr, heap);
1159 /* thr2->child = row_undo_node_create(trx, thr2, heap); */
1161 return(fork);
1164 /*********************************************************************//**
1165 Finishes error processing after the necessary partial rollback has been
1166 done. */
1167 static
1168 void
1169 trx_finish_error_processing(
1170 /*========================*/
1171 trx_t* trx) /*!< in: transaction */
1173 trx_sig_t* sig;
1174 trx_sig_t* next_sig;
1176 ut_ad(mutex_own(&kernel_mutex));
1178 sig = UT_LIST_GET_FIRST(trx->signals);
1180 while (sig != NULL) {
1181 next_sig = UT_LIST_GET_NEXT(signals, sig);
1183 if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1185 trx_sig_remove(trx, sig);
1188 sig = next_sig;
1191 trx->que_state = TRX_QUE_RUNNING;
1194 /*********************************************************************//**
1195 Finishes a partial rollback operation. */
1196 static
1197 void
1198 trx_finish_partial_rollback_off_kernel(
1199 /*===================================*/
1200 trx_t* trx, /*!< in: transaction */
1201 que_thr_t** next_thr)/*!< in/out: next query thread to run;
1202 if the value which is passed in is a pointer
1203 to a NULL pointer, then the calling function
1204 can start running a new query thread; if this
1205 parameter is NULL, it is ignored */
1207 trx_sig_t* sig;
1209 ut_ad(mutex_own(&kernel_mutex));
1211 sig = UT_LIST_GET_FIRST(trx->signals);
1213 /* Remove the signal from the signal queue and send reply message
1214 to it */
1216 trx_sig_reply(sig, next_thr);
1217 trx_sig_remove(trx, sig);
1219 trx->que_state = TRX_QUE_RUNNING;
1222 /****************************************************************//**
1223 Finishes a transaction rollback. */
1224 UNIV_INTERN
1225 void
1226 trx_finish_rollback_off_kernel(
1227 /*===========================*/
1228 que_t* graph, /*!< in: undo graph which can now be freed */
1229 trx_t* trx, /*!< in: transaction */
1230 que_thr_t** next_thr)/*!< in/out: next query thread to run;
1231 if the value which is passed in is
1232 a pointer to a NULL pointer, then the
1233 calling function can start running
1234 a new query thread; if this parameter is
1235 NULL, it is ignored */
1237 trx_sig_t* sig;
1238 trx_sig_t* next_sig;
1240 ut_ad(mutex_own(&kernel_mutex));
1242 ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
1244 /* Free the memory reserved by the undo graph */
1245 que_graph_free(graph);
1247 sig = UT_LIST_GET_FIRST(trx->signals);
1249 if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1251 trx_finish_partial_rollback_off_kernel(trx, next_thr);
1253 return;
1255 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1257 trx_finish_error_processing(trx);
1259 return;
1262 #ifdef UNIV_DEBUG
1263 if (lock_print_waits) {
1264 fprintf(stderr, "Trx %lu rollback finished\n",
1265 (ulong) ut_dulint_get_low(trx->id));
1267 #endif /* UNIV_DEBUG */
1269 trx_commit_off_kernel(trx);
1271 /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
1272 send reply messages to them */
1274 trx->que_state = TRX_QUE_RUNNING;
1276 while (sig != NULL) {
1277 next_sig = UT_LIST_GET_NEXT(signals, sig);
1279 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1281 trx_sig_reply(sig, next_thr);
1283 trx_sig_remove(trx, sig);
1286 sig = next_sig;
1290 /*********************************************************************//**
1291 Creates a rollback command node struct.
1292 @return own: rollback node struct */
1293 UNIV_INTERN
1294 roll_node_t*
1295 roll_node_create(
1296 /*=============*/
1297 mem_heap_t* heap) /*!< in: mem heap where created */
1299 roll_node_t* node;
1301 node = mem_heap_alloc(heap, sizeof(roll_node_t));
1302 node->common.type = QUE_NODE_ROLLBACK;
1303 node->state = ROLL_NODE_SEND;
1305 node->partial = FALSE;
1307 return(node);
1310 /***********************************************************//**
1311 Performs an execution step for a rollback command node in a query graph.
1312 @return query thread to run next, or NULL */
1313 UNIV_INTERN
1314 que_thr_t*
1315 trx_rollback_step(
1316 /*==============*/
1317 que_thr_t* thr) /*!< in: query thread */
1319 roll_node_t* node;
1320 ulint sig_no;
1321 trx_savept_t* savept;
1323 node = thr->run_node;
1325 ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
1327 if (thr->prev_node == que_node_get_parent(node)) {
1328 node->state = ROLL_NODE_SEND;
1331 if (node->state == ROLL_NODE_SEND) {
1332 mutex_enter(&kernel_mutex);
1334 node->state = ROLL_NODE_WAIT;
1336 if (node->partial) {
1337 sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
1338 savept = &(node->savept);
1339 } else {
1340 sig_no = TRX_SIG_TOTAL_ROLLBACK;
1341 savept = NULL;
1344 /* Send a rollback signal to the transaction */
1346 trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
1347 savept, NULL);
1349 thr->state = QUE_THR_SIG_REPLY_WAIT;
1351 mutex_exit(&kernel_mutex);
1353 return(NULL);
1356 ut_ad(node->state == ROLL_NODE_WAIT);
1358 thr->run_node = que_node_get_parent(node);
1360 return(thr);