mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innobase / trx / trx0roll.c
blob285e30796a5c7dfbb0c4e8f67e46fff62a79d59e
1 /******************************************************
2 Transaction rollback
4 (c) 1996 Innobase Oy
6 Created 3/26/1996 Heikki Tuuri
7 *******************************************************/
9 #include "trx0roll.h"
11 #ifdef UNIV_NONINL
12 #include "trx0roll.ic"
13 #endif
15 #include "fsp0fsp.h"
16 #include "mach0data.h"
17 #include "trx0rseg.h"
18 #include "trx0trx.h"
19 #include "trx0undo.h"
20 #include "trx0rec.h"
21 #include "que0que.h"
22 #include "usr0sess.h"
23 #include "srv0que.h"
24 #include "srv0start.h"
25 #include "row0undo.h"
26 #include "row0mysql.h"
27 #include "lock0lock.h"
28 #include "pars0pars.h"
30 /* This many pages must be undone before a truncate is tried within rollback */
31 #define TRX_ROLL_TRUNC_THRESHOLD 1
33 /* In crash recovery, the current trx to be rolled back */
34 trx_t* trx_roll_crash_recv_trx = NULL;
36 /* In crash recovery we set this to the undo n:o of the current trx to be
37 rolled back. Then we can print how many % the rollback has progressed. */
38 ib_longlong trx_roll_max_undo_no;
40 /* Auxiliary variable which tells the previous progress % we printed */
41 ulint trx_roll_progress_printed_pct;
43 /***********************************************************************
44 Rollback a transaction used in MySQL. */
46 int
47 trx_general_rollback_for_mysql(
48 /*===========================*/
49 /* out: error code or DB_SUCCESS */
50 trx_t* trx, /* in: transaction handle */
51 ibool partial,/* in: TRUE if partial rollback requested */
52 trx_savept_t* savept) /* in: pointer to savepoint undo number, if
53 partial rollback requested */
55 #ifndef UNIV_HOTBACKUP
56 mem_heap_t* heap;
57 que_thr_t* thr;
58 roll_node_t* roll_node;
60 /* Tell Innobase server that there might be work for
61 utility threads: */
63 srv_active_wake_master_thread();
65 trx_start_if_not_started(trx);
67 heap = mem_heap_create(512);
69 roll_node = roll_node_create(heap);
71 roll_node->partial = partial;
73 if (partial) {
74 roll_node->savept = *savept;
77 trx->error_state = DB_SUCCESS;
79 thr = pars_complete_graph_for_exec(roll_node, trx, heap);
81 ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
82 que_run_threads(thr);
84 mutex_enter(&kernel_mutex);
86 while (trx->que_state != TRX_QUE_RUNNING) {
88 mutex_exit(&kernel_mutex);
90 os_thread_sleep(100000);
92 mutex_enter(&kernel_mutex);
95 mutex_exit(&kernel_mutex);
97 mem_heap_free(heap);
99 ut_a(trx->error_state == DB_SUCCESS);
101 /* Tell Innobase server that there might be work for
102 utility threads: */
104 srv_active_wake_master_thread();
106 return((int) trx->error_state);
107 #else /* UNIV_HOTBACKUP */
108 /* This function depends on MySQL code that is not included in
109 InnoDB Hot Backup builds. Besides, this function should never
110 be called in InnoDB Hot Backup. */
111 ut_error;
112 return(DB_FAIL);
113 #endif /* UNIV_HOTBACKUP */
116 /***********************************************************************
117 Rollback a transaction used in MySQL. */
120 trx_rollback_for_mysql(
121 /*===================*/
122 /* out: error code or DB_SUCCESS */
123 trx_t* trx) /* in: transaction handle */
125 int err;
127 if (trx->conc_state == TRX_NOT_STARTED) {
129 return(DB_SUCCESS);
132 trx->op_info = "rollback";
134 /* If we are doing the XA recovery of prepared transactions, then
135 the transaction object does not have an InnoDB session object, and we
136 set a dummy session that we use for all MySQL transactions. */
138 mutex_enter(&kernel_mutex);
140 if (trx->sess == NULL) {
141 /* Open a dummy session */
143 if (!trx_dummy_sess) {
144 trx_dummy_sess = sess_open();
147 trx->sess = trx_dummy_sess;
150 mutex_exit(&kernel_mutex);
152 err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
154 trx->op_info = "";
156 return(err);
159 /***********************************************************************
160 Rollback the latest SQL statement for MySQL. */
163 trx_rollback_last_sql_stat_for_mysql(
164 /*=================================*/
165 /* out: error code or DB_SUCCESS */
166 trx_t* trx) /* in: transaction handle */
168 int err;
170 if (trx->conc_state == TRX_NOT_STARTED) {
172 return(DB_SUCCESS);
175 trx->op_info = "rollback of SQL statement";
177 err = trx_general_rollback_for_mysql(trx, TRUE,
178 &(trx->last_sql_stat_start));
179 /* The following call should not be needed, but we play safe: */
180 trx_mark_sql_stat_end(trx);
182 trx->op_info = "";
184 return(err);
187 /***********************************************************************
188 Frees a single savepoint struct. */
190 void
191 trx_roll_savepoint_free(
192 /*=====================*/
193 trx_t* trx, /* in: transaction handle */
194 trx_named_savept_t* savep) /* in: savepoint to free */
196 ut_a(savep != NULL);
197 ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0);
199 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
200 mem_free(savep->name);
201 mem_free(savep);
204 /***********************************************************************
205 Frees savepoint structs starting from savep, if savep == NULL then
206 free all savepoints. */
208 void
209 trx_roll_savepoints_free(
210 /*=====================*/
211 trx_t* trx, /* in: transaction handle */
212 trx_named_savept_t* savep) /* in: free all savepoints > this one;
213 if this is NULL, free all savepoints
214 of trx */
216 trx_named_savept_t* next_savep;
218 if (savep == NULL) {
219 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
220 } else {
221 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
224 while (savep != NULL) {
225 next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
227 trx_roll_savepoint_free(trx, savep);
229 savep = next_savep;
233 /***********************************************************************
234 Rolls back a transaction back to a named savepoint. Modifications after the
235 savepoint are undone but InnoDB does NOT release the corresponding locks
236 which are stored in memory. If a lock is 'implicit', that is, a new inserted
237 row holds a lock where the lock information is carried by the trx id stored in
238 the row, these locks are naturally released in the rollback. Savepoints which
239 were set after this savepoint are deleted. */
241 ulint
242 trx_rollback_to_savepoint_for_mysql(
243 /*================================*/
244 /* out: if no savepoint
245 of the name found then
246 DB_NO_SAVEPOINT,
247 otherwise DB_SUCCESS */
248 trx_t* trx, /* in: transaction handle */
249 const char* savepoint_name, /* in: savepoint name */
250 ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
251 position corresponding to this
252 savepoint; MySQL needs this
253 information to remove the
254 binlog entries of the queries
255 executed after the savepoint */
257 trx_named_savept_t* savep;
258 ulint err;
260 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
262 while (savep != NULL) {
263 if (0 == ut_strcmp(savep->name, savepoint_name)) {
264 /* Found */
265 break;
267 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
270 if (savep == NULL) {
272 return(DB_NO_SAVEPOINT);
275 if (trx->conc_state == TRX_NOT_STARTED) {
276 ut_print_timestamp(stderr);
277 fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
278 ut_print_name(stderr, trx, FALSE, savep->name);
279 fputs(" though it is not started\n", stderr);
280 return(DB_ERROR);
283 /* We can now free all savepoints strictly later than this one */
285 trx_roll_savepoints_free(trx, savep);
287 *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
289 trx->op_info = "rollback to a savepoint";
291 err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
293 /* Store the current undo_no of the transaction so that we know where
294 to roll back if we have to roll back the next SQL statement: */
296 trx_mark_sql_stat_end(trx);
298 trx->op_info = "";
300 return(err);
303 /***********************************************************************
304 Creates a named savepoint. If the transaction is not yet started, starts it.
305 If there is already a savepoint of the same name, this call erases that old
306 savepoint and replaces it with a new. Savepoints are deleted in a transaction
307 commit or rollback. */
309 ulint
310 trx_savepoint_for_mysql(
311 /*====================*/
312 /* out: always DB_SUCCESS */
313 trx_t* trx, /* in: transaction handle */
314 const char* savepoint_name, /* in: savepoint name */
315 ib_longlong binlog_cache_pos) /* in: MySQL binlog cache
316 position corresponding to this
317 connection at the time of the
318 savepoint */
320 trx_named_savept_t* savep;
322 ut_a(trx);
323 ut_a(savepoint_name);
325 trx_start_if_not_started(trx);
327 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
329 while (savep != NULL) {
330 if (0 == ut_strcmp(savep->name, savepoint_name)) {
331 /* Found */
332 break;
334 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
337 if (savep) {
338 /* There is a savepoint with the same name: free that */
340 UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
342 mem_free(savep->name);
343 mem_free(savep);
346 /* Create a new savepoint and add it as the last in the list */
348 savep = mem_alloc(sizeof(trx_named_savept_t));
350 savep->name = mem_strdup(savepoint_name);
352 savep->savept = trx_savept_take(trx);
354 savep->mysql_binlog_cache_pos = binlog_cache_pos;
356 UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
358 return(DB_SUCCESS);
361 /***********************************************************************
362 Releases only the named savepoint. Savepoints which were set after this
363 savepoint are left as is. */
365 ulint
366 trx_release_savepoint_for_mysql(
367 /*============================*/
368 /* out: if no savepoint
369 of the name found then
370 DB_NO_SAVEPOINT,
371 otherwise DB_SUCCESS */
372 trx_t* trx, /* in: transaction handle */
373 const char* savepoint_name) /* in: savepoint name */
375 trx_named_savept_t* savep;
377 savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
379 /* Search for the savepoint by name and free if found. */
380 while (savep != NULL) {
381 if (0 == ut_strcmp(savep->name, savepoint_name)) {
382 trx_roll_savepoint_free(trx, savep);
383 return(DB_SUCCESS);
385 savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
388 return(DB_NO_SAVEPOINT);
391 /***********************************************************************
392 Returns a transaction savepoint taken at this point in time. */
394 trx_savept_t
395 trx_savept_take(
396 /*============*/
397 /* out: savepoint */
398 trx_t* trx) /* in: transaction */
400 trx_savept_t savept;
402 savept.least_undo_no = trx->undo_no;
404 return(savept);
407 /***********************************************************************
408 Rollback or clean up transactions which have no user session. If the
409 transaction already was committed, then we clean up a possible insert
410 undo log. If the transaction was not yet committed, then we roll it back.
411 Note: this is done in a background thread. */
413 os_thread_ret_t
414 trx_rollback_or_clean_all_without_sess(
415 /*===================================*/
416 /* out: a dummy parameter */
417 void* arg __attribute__((unused)))
418 /* in: a dummy parameter required by
419 os_thread_create */
421 mem_heap_t* heap;
422 que_fork_t* fork;
423 que_thr_t* thr;
424 roll_node_t* roll_node;
425 trx_t* trx;
426 dict_table_t* table;
427 ib_longlong rows_to_undo;
428 const char* unit = "";
429 int err;
431 mutex_enter(&kernel_mutex);
433 /* Open a dummy session */
435 if (!trx_dummy_sess) {
436 trx_dummy_sess = sess_open();
439 mutex_exit(&kernel_mutex);
441 if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
443 fprintf(stderr,
444 "InnoDB: Starting in background the rollback"
445 " of uncommitted transactions\n");
446 } else {
447 goto leave_function;
449 loop:
450 heap = mem_heap_create(512);
452 mutex_enter(&kernel_mutex);
454 trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
456 while (trx) {
457 if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) {
458 trx = UT_LIST_GET_NEXT(trx_list, trx);
459 } else if (trx->conc_state == TRX_PREPARED) {
461 trx->sess = trx_dummy_sess;
462 trx = UT_LIST_GET_NEXT(trx_list, trx);
463 } else {
464 break;
468 mutex_exit(&kernel_mutex);
470 if (trx == NULL) {
471 ut_print_timestamp(stderr);
472 fprintf(stderr,
473 " InnoDB: Rollback of non-prepared transactions"
474 " completed\n");
476 mem_heap_free(heap);
478 goto leave_function;
481 trx->sess = trx_dummy_sess;
483 if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {
484 fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
485 (ulong) ut_dulint_get_high(trx->id),
486 (ulong) ut_dulint_get_low(trx->id));
488 trx_cleanup_at_db_startup(trx);
490 mem_heap_free(heap);
492 goto loop;
495 fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
496 fork->trx = trx;
498 thr = que_thr_create(fork, heap);
500 roll_node = roll_node_create(heap);
502 thr->child = roll_node;
503 roll_node->common.parent = thr;
505 mutex_enter(&kernel_mutex);
507 trx->graph = fork;
509 ut_a(thr == que_fork_start_command(fork));
511 trx_roll_crash_recv_trx = trx;
512 trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
513 trx_roll_progress_printed_pct = 0;
514 rows_to_undo = trx_roll_max_undo_no;
516 if (rows_to_undo > 1000000000) {
517 rows_to_undo = rows_to_undo / 1000000;
518 unit = "M";
521 ut_print_timestamp(stderr);
522 fprintf(stderr,
523 " InnoDB: Rolling back trx with id %lu %lu, %lu%s"
524 " rows to undo\n",
525 (ulong) ut_dulint_get_high(trx->id),
526 (ulong) ut_dulint_get_low(trx->id),
527 (ulong) rows_to_undo, unit);
528 mutex_exit(&kernel_mutex);
530 trx->mysql_thread_id = os_thread_get_curr_id();
532 trx->mysql_process_no = os_proc_get_number();
534 if (trx->dict_operation) {
535 row_mysql_lock_data_dictionary(trx);
538 que_run_threads(thr);
540 mutex_enter(&kernel_mutex);
542 while (trx->que_state != TRX_QUE_RUNNING) {
544 mutex_exit(&kernel_mutex);
546 fprintf(stderr,
547 "InnoDB: Waiting for rollback of trx id %lu to end\n",
548 (ulong) ut_dulint_get_low(trx->id));
549 os_thread_sleep(100000);
551 mutex_enter(&kernel_mutex);
554 mutex_exit(&kernel_mutex);
556 if (trx->dict_operation) {
557 /* If the transaction was for a dictionary operation, we
558 drop the relevant table, if it still exists */
560 fprintf(stderr,
561 "InnoDB: Dropping table with id %lu %lu"
562 " in recovery if it exists\n",
563 (ulong) ut_dulint_get_high(trx->table_id),
564 (ulong) ut_dulint_get_low(trx->table_id));
566 table = dict_table_get_on_id_low(trx->table_id);
568 if (table) {
569 fputs("InnoDB: Table found: dropping table ", stderr);
570 ut_print_name(stderr, trx, TRUE, table->name);
571 fputs(" in recovery\n", stderr);
573 err = row_drop_table_for_mysql(table->name, trx, TRUE);
575 ut_a(err == (int) DB_SUCCESS);
579 if (trx->dict_operation) {
580 row_mysql_unlock_data_dictionary(trx);
583 fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
584 (ulong) ut_dulint_get_high(trx->id),
585 (ulong) ut_dulint_get_low(trx->id));
586 mem_heap_free(heap);
588 trx_roll_crash_recv_trx = NULL;
590 goto loop;
592 leave_function:
593 /* We count the number of threads in os_thread_exit(). A created
594 thread should always use that to exit and not use return() to exit. */
596 os_thread_exit(NULL);
598 OS_THREAD_DUMMY_RETURN;
601 /***********************************************************************
602 Creates an undo number array. */
604 trx_undo_arr_t*
605 trx_undo_arr_create(void)
606 /*=====================*/
608 trx_undo_arr_t* arr;
609 mem_heap_t* heap;
610 ulint i;
612 heap = mem_heap_create(1024);
614 arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
616 arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
617 * UNIV_MAX_PARALLELISM);
618 arr->n_cells = UNIV_MAX_PARALLELISM;
619 arr->n_used = 0;
621 arr->heap = heap;
623 for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
625 (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
628 return(arr);
631 /***********************************************************************
632 Frees an undo number array. */
634 void
635 trx_undo_arr_free(
636 /*==============*/
637 trx_undo_arr_t* arr) /* in: undo number array */
639 ut_ad(arr->n_used == 0);
641 mem_heap_free(arr->heap);
644 /***********************************************************************
645 Stores info of an undo log record to the array if it is not stored yet. */
646 static
647 ibool
648 trx_undo_arr_store_info(
649 /*====================*/
650 /* out: FALSE if the record already existed in the
651 array */
652 trx_t* trx, /* in: transaction */
653 dulint undo_no)/* in: undo number */
655 trx_undo_inf_t* cell;
656 trx_undo_inf_t* stored_here;
657 trx_undo_arr_t* arr;
658 ulint n_used;
659 ulint n;
660 ulint i;
662 n = 0;
663 arr = trx->undo_no_arr;
664 n_used = arr->n_used;
665 stored_here = NULL;
667 for (i = 0;; i++) {
668 cell = trx_undo_arr_get_nth_info(arr, i);
670 if (!cell->in_use) {
671 if (!stored_here) {
672 /* Not in use, we may store here */
673 cell->undo_no = undo_no;
674 cell->in_use = TRUE;
676 arr->n_used++;
678 stored_here = cell;
680 } else {
681 n++;
683 if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
685 if (stored_here) {
686 stored_here->in_use = FALSE;
687 ut_ad(arr->n_used > 0);
688 arr->n_used--;
691 ut_ad(arr->n_used == n_used);
693 return(FALSE);
697 if (n == n_used && stored_here) {
699 ut_ad(arr->n_used == 1 + n_used);
701 return(TRUE);
706 /***********************************************************************
707 Removes an undo number from the array. */
708 static
709 void
710 trx_undo_arr_remove_info(
711 /*=====================*/
712 trx_undo_arr_t* arr, /* in: undo number array */
713 dulint undo_no)/* in: undo number */
715 trx_undo_inf_t* cell;
716 ulint i;
718 for (i = 0;; i++) {
719 cell = trx_undo_arr_get_nth_info(arr, i);
721 if (cell->in_use
722 && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
724 cell->in_use = FALSE;
726 ut_ad(arr->n_used > 0);
728 arr->n_used--;
730 return;
735 /***********************************************************************
736 Gets the biggest undo number in an array. */
737 static
738 dulint
739 trx_undo_arr_get_biggest(
740 /*=====================*/
741 /* out: biggest value, ut_dulint_zero if
742 the array is empty */
743 trx_undo_arr_t* arr) /* in: undo number array */
745 trx_undo_inf_t* cell;
746 ulint n_used;
747 dulint biggest;
748 ulint n;
749 ulint i;
751 n = 0;
752 n_used = arr->n_used;
753 biggest = ut_dulint_zero;
755 for (i = 0;; i++) {
756 cell = trx_undo_arr_get_nth_info(arr, i);
758 if (cell->in_use) {
759 n++;
760 if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
762 biggest = cell->undo_no;
766 if (n == n_used) {
767 return(biggest);
772 /***************************************************************************
773 Tries truncate the undo logs. */
775 void
776 trx_roll_try_truncate(
777 /*==================*/
778 trx_t* trx) /* in: transaction */
780 trx_undo_arr_t* arr;
781 dulint limit;
782 dulint biggest;
784 ut_ad(mutex_own(&(trx->undo_mutex)));
785 ut_ad(mutex_own(&((trx->rseg)->mutex)));
787 trx->pages_undone = 0;
789 arr = trx->undo_no_arr;
791 limit = trx->undo_no;
793 if (arr->n_used > 0) {
794 biggest = trx_undo_arr_get_biggest(arr);
796 if (ut_dulint_cmp(biggest, limit) >= 0) {
798 limit = ut_dulint_add(biggest, 1);
802 if (trx->insert_undo) {
803 trx_undo_truncate_end(trx, trx->insert_undo, limit);
806 if (trx->update_undo) {
807 trx_undo_truncate_end(trx, trx->update_undo, limit);
811 /***************************************************************************
812 Pops the topmost undo log record in a single undo log and updates the info
813 about the topmost record in the undo log memory struct. */
814 static
815 trx_undo_rec_t*
816 trx_roll_pop_top_rec(
817 /*=================*/
818 /* out: undo log record, the page s-latched */
819 trx_t* trx, /* in: transaction */
820 trx_undo_t* undo, /* in: undo log */
821 mtr_t* mtr) /* in: mtr */
823 page_t* undo_page;
824 ulint offset;
825 trx_undo_rec_t* prev_rec;
826 page_t* prev_rec_page;
828 ut_ad(mutex_own(&(trx->undo_mutex)));
830 undo_page = trx_undo_page_get_s_latched(undo->space,
831 undo->top_page_no, mtr);
832 offset = undo->top_offset;
834 /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
835 os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
836 ut_dulint_get_low(undo->top_undo_no)); */
838 prev_rec = trx_undo_get_prev_rec(undo_page + offset,
839 undo->hdr_page_no, undo->hdr_offset,
840 mtr);
841 if (prev_rec == NULL) {
843 undo->empty = TRUE;
844 } else {
845 prev_rec_page = buf_frame_align(prev_rec);
847 if (prev_rec_page != undo_page) {
849 trx->pages_undone++;
852 undo->top_page_no = buf_frame_get_page_no(prev_rec_page);
853 undo->top_offset = prev_rec - prev_rec_page;
854 undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
857 return(undo_page + offset);
860 /************************************************************************
861 Pops the topmost record when the two undo logs of a transaction are seen
862 as a single stack of records ordered by their undo numbers. Inserts the
863 undo number of the popped undo record to the array of currently processed
864 undo numbers in the transaction. When the query thread finishes processing
865 of this undo record, it must be released with trx_undo_rec_release. */
867 trx_undo_rec_t*
868 trx_roll_pop_top_rec_of_trx(
869 /*========================*/
870 /* out: undo log record copied to heap, NULL
871 if none left, or if the undo number of the
872 top record would be less than the limit */
873 trx_t* trx, /* in: transaction */
874 dulint limit, /* in: least undo number we need */
875 dulint* roll_ptr,/* out: roll pointer to undo record */
876 mem_heap_t* heap) /* in: memory heap where copied */
878 trx_undo_t* undo;
879 trx_undo_t* ins_undo;
880 trx_undo_t* upd_undo;
881 trx_undo_rec_t* undo_rec;
882 trx_undo_rec_t* undo_rec_copy;
883 dulint undo_no;
884 ibool is_insert;
885 trx_rseg_t* rseg;
886 ulint progress_pct;
887 mtr_t mtr;
889 rseg = trx->rseg;
890 try_again:
891 mutex_enter(&(trx->undo_mutex));
893 if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
894 mutex_enter(&(rseg->mutex));
896 trx_roll_try_truncate(trx);
898 mutex_exit(&(rseg->mutex));
901 ins_undo = trx->insert_undo;
902 upd_undo = trx->update_undo;
904 if (!ins_undo || ins_undo->empty) {
905 undo = upd_undo;
906 } else if (!upd_undo || upd_undo->empty) {
907 undo = ins_undo;
908 } else if (ut_dulint_cmp(upd_undo->top_undo_no,
909 ins_undo->top_undo_no) > 0) {
910 undo = upd_undo;
911 } else {
912 undo = ins_undo;
915 if (!undo || undo->empty
916 || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
918 if ((trx->undo_no_arr)->n_used == 0) {
919 /* Rollback is ending */
921 mutex_enter(&(rseg->mutex));
923 trx_roll_try_truncate(trx);
925 mutex_exit(&(rseg->mutex));
928 mutex_exit(&(trx->undo_mutex));
930 return(NULL);
933 if (undo == ins_undo) {
934 is_insert = TRUE;
935 } else {
936 is_insert = FALSE;
939 *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
940 undo->top_page_no,
941 undo->top_offset);
942 mtr_start(&mtr);
944 undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
946 undo_no = trx_undo_rec_get_undo_no(undo_rec);
948 ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
950 /* We print rollback progress info if we are in a crash recovery
951 and the transaction has at least 1000 row operations to undo. */
953 if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
955 progress_pct = 100 - (ulint)
956 ((ut_conv_dulint_to_longlong(undo_no) * 100)
957 / trx_roll_max_undo_no);
958 if (progress_pct != trx_roll_progress_printed_pct) {
959 if (trx_roll_progress_printed_pct == 0) {
960 fprintf(stderr,
961 "\nInnoDB: Progress in percents:"
962 " %lu", (ulong) progress_pct);
963 } else {
964 fprintf(stderr,
965 " %lu", (ulong) progress_pct);
967 fflush(stderr);
968 trx_roll_progress_printed_pct = progress_pct;
972 trx->undo_no = undo_no;
974 if (!trx_undo_arr_store_info(trx, undo_no)) {
975 /* A query thread is already processing this undo log record */
977 mutex_exit(&(trx->undo_mutex));
979 mtr_commit(&mtr);
981 goto try_again;
984 undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
986 mutex_exit(&(trx->undo_mutex));
988 mtr_commit(&mtr);
990 return(undo_rec_copy);
993 /************************************************************************
994 Reserves an undo log record for a query thread to undo. This should be
995 called if the query thread gets the undo log record not using the pop
996 function above. */
998 ibool
999 trx_undo_rec_reserve(
1000 /*=================*/
1001 /* out: TRUE if succeeded */
1002 trx_t* trx, /* in: transaction */
1003 dulint undo_no)/* in: undo number of the record */
1005 ibool ret;
1007 mutex_enter(&(trx->undo_mutex));
1009 ret = trx_undo_arr_store_info(trx, undo_no);
1011 mutex_exit(&(trx->undo_mutex));
1013 return(ret);
1016 /***********************************************************************
1017 Releases a reserved undo record. */
1019 void
1020 trx_undo_rec_release(
1021 /*=================*/
1022 trx_t* trx, /* in: transaction */
1023 dulint undo_no)/* in: undo number */
1025 trx_undo_arr_t* arr;
1027 mutex_enter(&(trx->undo_mutex));
1029 arr = trx->undo_no_arr;
1031 trx_undo_arr_remove_info(arr, undo_no);
1033 mutex_exit(&(trx->undo_mutex));
1036 /*************************************************************************
1037 Starts a rollback operation. */
1039 void
1040 trx_rollback(
1041 /*=========*/
1042 trx_t* trx, /* in: transaction */
1043 trx_sig_t* sig, /* in: signal starting the rollback */
1044 que_thr_t** next_thr)/* in/out: next query thread to run;
1045 if the value which is passed in is
1046 a pointer to a NULL pointer, then the
1047 calling function can start running
1048 a new query thread; if the passed value is
1049 NULL, the parameter is ignored */
1051 que_t* roll_graph;
1052 que_thr_t* thr;
1053 /* que_thr_t* thr2; */
1055 ut_ad(mutex_own(&kernel_mutex));
1056 ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
1058 /* Initialize the rollback field in the transaction */
1060 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1062 trx->roll_limit = ut_dulint_zero;
1064 } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1066 trx->roll_limit = (sig->savept).least_undo_no;
1068 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1070 trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
1071 } else {
1072 ut_error;
1075 ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
1077 trx->pages_undone = 0;
1079 if (trx->undo_no_arr == NULL) {
1080 trx->undo_no_arr = trx_undo_arr_create();
1083 /* Build a 'query' graph which will perform the undo operations */
1085 roll_graph = trx_roll_graph_build(trx);
1087 trx->graph = roll_graph;
1088 trx->que_state = TRX_QUE_ROLLING_BACK;
1090 thr = que_fork_start_command(roll_graph);
1092 ut_ad(thr);
1094 /* thr2 = que_fork_start_command(roll_graph);
1096 ut_ad(thr2); */
1098 if (next_thr && (*next_thr == NULL)) {
1099 *next_thr = thr;
1100 /* srv_que_task_enqueue_low(thr2); */
1101 } else {
1102 srv_que_task_enqueue_low(thr);
1103 /* srv_que_task_enqueue_low(thr2); */
1107 /********************************************************************
1108 Builds an undo 'query' graph for a transaction. The actual rollback is
1109 performed by executing this query graph like a query subprocedure call.
1110 The reply about the completion of the rollback will be sent by this
1111 graph. */
1113 que_t*
1114 trx_roll_graph_build(
1115 /*=================*/
1116 /* out, own: the query graph */
1117 trx_t* trx) /* in: trx handle */
1119 mem_heap_t* heap;
1120 que_fork_t* fork;
1121 que_thr_t* thr;
1122 /* que_thr_t* thr2; */
1124 ut_ad(mutex_own(&kernel_mutex));
1126 heap = mem_heap_create(512);
1127 fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
1128 fork->trx = trx;
1130 thr = que_thr_create(fork, heap);
1131 /* thr2 = que_thr_create(fork, heap); */
1133 thr->child = row_undo_node_create(trx, thr, heap);
1134 /* thr2->child = row_undo_node_create(trx, thr2, heap); */
1136 return(fork);
1139 /*************************************************************************
1140 Finishes error processing after the necessary partial rollback has been
1141 done. */
1142 static
1143 void
1144 trx_finish_error_processing(
1145 /*========================*/
1146 trx_t* trx) /* in: transaction */
1148 trx_sig_t* sig;
1149 trx_sig_t* next_sig;
1151 ut_ad(mutex_own(&kernel_mutex));
1153 sig = UT_LIST_GET_FIRST(trx->signals);
1155 while (sig != NULL) {
1156 next_sig = UT_LIST_GET_NEXT(signals, sig);
1158 if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1160 trx_sig_remove(trx, sig);
1163 sig = next_sig;
1166 trx->que_state = TRX_QUE_RUNNING;
1169 /*************************************************************************
1170 Finishes a partial rollback operation. */
1171 static
1172 void
1173 trx_finish_partial_rollback_off_kernel(
1174 /*===================================*/
1175 trx_t* trx, /* in: transaction */
1176 que_thr_t** next_thr)/* in/out: next query thread to run;
1177 if the value which is passed in is a pointer
1178 to a NULL pointer, then the calling function
1179 can start running a new query thread; if this
1180 parameter is NULL, it is ignored */
1182 trx_sig_t* sig;
1184 ut_ad(mutex_own(&kernel_mutex));
1186 sig = UT_LIST_GET_FIRST(trx->signals);
1188 /* Remove the signal from the signal queue and send reply message
1189 to it */
1191 trx_sig_reply(sig, next_thr);
1192 trx_sig_remove(trx, sig);
1194 trx->que_state = TRX_QUE_RUNNING;
1197 /********************************************************************
1198 Finishes a transaction rollback. */
1200 void
1201 trx_finish_rollback_off_kernel(
1202 /*===========================*/
1203 que_t* graph, /* in: undo graph which can now be freed */
1204 trx_t* trx, /* in: transaction */
1205 que_thr_t** next_thr)/* in/out: next query thread to run;
1206 if the value which is passed in is
1207 a pointer to a NULL pointer, then the
1208 calling function can start running
1209 a new query thread; if this parameter is
1210 NULL, it is ignored */
1212 trx_sig_t* sig;
1213 trx_sig_t* next_sig;
1215 ut_ad(mutex_own(&kernel_mutex));
1217 ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
1219 /* Free the memory reserved by the undo graph */
1220 que_graph_free(graph);
1222 sig = UT_LIST_GET_FIRST(trx->signals);
1224 if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
1226 trx_finish_partial_rollback_off_kernel(trx, next_thr);
1228 return;
1230 } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
1232 trx_finish_error_processing(trx);
1234 return;
1237 #ifdef UNIV_DEBUG
1238 if (lock_print_waits) {
1239 fprintf(stderr, "Trx %lu rollback finished\n",
1240 (ulong) ut_dulint_get_low(trx->id));
1242 #endif /* UNIV_DEBUG */
1244 trx_commit_off_kernel(trx);
1246 /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
1247 send reply messages to them */
1249 trx->que_state = TRX_QUE_RUNNING;
1251 while (sig != NULL) {
1252 next_sig = UT_LIST_GET_NEXT(signals, sig);
1254 if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1256 trx_sig_reply(sig, next_thr);
1258 trx_sig_remove(trx, sig);
1261 sig = next_sig;
1265 /*************************************************************************
1266 Creates a rollback command node struct. */
1268 roll_node_t*
1269 roll_node_create(
1270 /*=============*/
1271 /* out, own: rollback node struct */
1272 mem_heap_t* heap) /* in: mem heap where created */
1274 roll_node_t* node;
1276 node = mem_heap_alloc(heap, sizeof(roll_node_t));
1277 node->common.type = QUE_NODE_ROLLBACK;
1278 node->state = ROLL_NODE_SEND;
1280 node->partial = FALSE;
1282 return(node);
1285 /***************************************************************
1286 Performs an execution step for a rollback command node in a query graph. */
1288 que_thr_t*
1289 trx_rollback_step(
1290 /*==============*/
1291 /* out: query thread to run next, or NULL */
1292 que_thr_t* thr) /* in: query thread */
1294 roll_node_t* node;
1295 ulint sig_no;
1296 trx_savept_t* savept;
1298 node = thr->run_node;
1300 ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
1302 if (thr->prev_node == que_node_get_parent(node)) {
1303 node->state = ROLL_NODE_SEND;
1306 if (node->state == ROLL_NODE_SEND) {
1307 mutex_enter(&kernel_mutex);
1309 node->state = ROLL_NODE_WAIT;
1311 if (node->partial) {
1312 sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
1313 savept = &(node->savept);
1314 } else {
1315 sig_no = TRX_SIG_TOTAL_ROLLBACK;
1316 savept = NULL;
1319 /* Send a rollback signal to the transaction */
1321 trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
1322 savept, NULL);
1324 thr->state = QUE_THR_SIG_REPLY_WAIT;
1326 mutex_exit(&kernel_mutex);
1328 return(NULL);
1331 ut_ad(node->state == ROLL_NODE_WAIT);
1333 thr->run_node = que_node_get_parent(node);
1335 return(thr);