1 /*****************************************************************************
3 Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 *****************************************************************************/
19 /**************************************************//**
21 Purge obsolete records
23 Created 3/14/1997 Heikki Tuuri
24 *******************************************************/
26 #include "row0purge.h"
29 #include "row0purge.ic"
33 #include "mach0data.h"
38 #include "trx0purge.h"
44 #include "row0mysql.h"
47 /*************************************************************************
48 IMPORTANT NOTE: Any operation that generates redo MUST check that there
49 is enough space in the redo log before for that operation. This is
50 done by calling log_free_check(). The reason for checking the
51 availability of the redo log space before the start of the operation is
52 that we MUST not hold any synchonization objects when performing the
54 If you make a change in this module make sure that no codepath is
55 introduced where a call to log_free_check() is bypassed. */
57 /********************************************************************//**
58 Creates a purge node to a query graph.
59 @return own: purge node */
62 row_purge_node_create(
63 /*==================*/
64 que_thr_t
* parent
, /*!< in: parent node, i.e., a thr node */
65 mem_heap_t
* heap
) /*!< in: memory heap where created */
69 ut_ad(parent
&& heap
);
71 node
= mem_heap_alloc(heap
, sizeof(purge_node_t
));
73 node
->common
.type
= QUE_NODE_PURGE
;
74 node
->common
.parent
= parent
;
76 node
->heap
= mem_heap_create(256);
81 /***********************************************************//**
82 Repositions the pcur in the purge node on the clustered index record,
84 @return TRUE if the record was found */
87 row_purge_reposition_pcur(
88 /*======================*/
89 ulint mode
, /*!< in: latching mode */
90 purge_node_t
* node
, /*!< in: row purge node */
91 mtr_t
* mtr
) /*!< in: mtr */
95 if (node
->found_clust
) {
96 found
= btr_pcur_restore_position(mode
, &(node
->pcur
), mtr
);
101 found
= row_search_on_row_ref(&(node
->pcur
), mode
, node
->table
,
103 node
->found_clust
= found
;
106 btr_pcur_store_position(&(node
->pcur
), mtr
);
112 /***********************************************************//**
113 Removes a delete marked clustered index record if possible.
114 @return TRUE if success, or if not found, or if modified after the
118 row_purge_remove_clust_if_poss_low(
119 /*===============================*/
120 purge_node_t
* node
, /*!< in: row purge node */
121 ulint mode
) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
130 mem_heap_t
* heap
= NULL
;
131 ulint offsets_
[REC_OFFS_NORMAL_SIZE
];
132 rec_offs_init(offsets_
);
134 index
= dict_table_get_first_index(node
->table
);
136 pcur
= &(node
->pcur
);
137 btr_cur
= btr_pcur_get_btr_cur(pcur
);
142 success
= row_purge_reposition_pcur(mode
, node
, &mtr
);
145 /* The record is already removed */
147 btr_pcur_commit_specify_mtr(pcur
, &mtr
);
152 rec
= btr_pcur_get_rec(pcur
);
154 if (0 != ut_dulint_cmp(node
->roll_ptr
, row_get_rec_roll_ptr(
155 rec
, index
, rec_get_offsets(
156 rec
, index
, offsets_
,
157 ULINT_UNDEFINED
, &heap
)))) {
158 if (UNIV_LIKELY_NULL(heap
)) {
161 /* Someone else has modified the record later: do not remove */
162 btr_pcur_commit_specify_mtr(pcur
, &mtr
);
167 if (UNIV_LIKELY_NULL(heap
)) {
171 if (mode
== BTR_MODIFY_LEAF
) {
172 success
= btr_cur_optimistic_delete(btr_cur
, &mtr
);
174 ut_ad(mode
== BTR_MODIFY_TREE
);
175 btr_cur_pessimistic_delete(&err
, FALSE
, btr_cur
,
178 if (err
== DB_SUCCESS
) {
180 } else if (err
== DB_OUT_OF_FILE_SPACE
) {
187 btr_pcur_commit_specify_mtr(pcur
, &mtr
);
192 /***********************************************************//**
193 Removes a clustered index record if it has not been modified after the delete
197 row_purge_remove_clust_if_poss(
198 /*===========================*/
199 purge_node_t
* node
) /*!< in: row purge node */
204 /* fputs("Purge: Removing clustered record\n", stderr); */
206 success
= row_purge_remove_clust_if_poss_low(node
, BTR_MODIFY_LEAF
);
212 success
= row_purge_remove_clust_if_poss_low(node
, BTR_MODIFY_TREE
);
213 /* The delete operation may fail if we have little
214 file space left: TODO: easiest to crash the database
215 and restart with more file space */
217 if (!success
&& n_tries
< BTR_CUR_RETRY_DELETE_N_TIMES
) {
220 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME
);
228 /***********************************************************//**
229 Removes a secondary index entry if possible.
230 @return TRUE if success or if not found */
233 row_purge_remove_sec_if_poss_low(
234 /*=============================*/
235 purge_node_t
* node
, /*!< in: row purge node */
236 dict_index_t
* index
, /*!< in: index */
237 const dtuple_t
* entry
, /*!< in: index entry */
238 ulint mode
) /*!< in: latch mode BTR_MODIFY_LEAF or
244 ibool old_has
= 0; /* remove warning */
253 found
= row_search_index_entry(index
, entry
, mode
, &pcur
, &mtr
);
256 /* Not found. This is a legitimate condition. In a
257 rollback, InnoDB will remove secondary recs that would
258 be purged anyway. Then the actual purge will not find
259 the secondary index record. Also, the purge itself is
260 eager: if it comes to consider a secondary index
261 record, and notices it does not need to exist in the
262 index, it will remove it. Then if/when the purge
263 comes to consider the secondary index record a second
264 time, it will not exist any more in the index. */
266 /* fputs("PURGE:........sec entry not found\n", stderr); */
267 /* dtuple_print(stderr, entry); */
269 btr_pcur_close(&pcur
);
275 btr_cur
= btr_pcur_get_btr_cur(&pcur
);
277 /* We should remove the index record if no later version of the row,
278 which cannot be purged yet, requires its existence. If some requires,
279 we should do nothing. */
281 mtr_start(&mtr_vers
);
283 success
= row_purge_reposition_pcur(BTR_SEARCH_LEAF
, node
, &mtr_vers
);
286 old_has
= row_vers_old_has_index_entry(
287 TRUE
, btr_pcur_get_rec(&(node
->pcur
)),
288 &mtr_vers
, index
, entry
);
291 btr_pcur_commit_specify_mtr(&(node
->pcur
), &mtr_vers
);
293 if (!success
|| !old_has
) {
294 /* Remove the index record */
296 if (mode
== BTR_MODIFY_LEAF
) {
297 success
= btr_cur_optimistic_delete(btr_cur
, &mtr
);
299 ut_ad(mode
== BTR_MODIFY_TREE
);
300 btr_cur_pessimistic_delete(&err
, FALSE
, btr_cur
,
302 success
= err
== DB_SUCCESS
;
303 ut_a(success
|| err
== DB_OUT_OF_FILE_SPACE
);
307 btr_pcur_close(&pcur
);
313 /***********************************************************//**
314 Removes a secondary index entry if possible. */
317 row_purge_remove_sec_if_poss(
318 /*=========================*/
319 purge_node_t
* node
, /*!< in: row purge node */
320 dict_index_t
* index
, /*!< in: index */
321 dtuple_t
* entry
) /*!< in: index entry */
326 /* fputs("Purge: Removing secondary record\n", stderr); */
328 success
= row_purge_remove_sec_if_poss_low(node
, index
, entry
,
335 success
= row_purge_remove_sec_if_poss_low(node
, index
, entry
,
337 /* The delete operation may fail if we have little
338 file space left: TODO: easiest to crash the database
339 and restart with more file space */
341 if (!success
&& n_tries
< BTR_CUR_RETRY_DELETE_N_TIMES
) {
345 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME
);
353 /***********************************************************//**
354 Purges a delete marking of a record. */
359 purge_node_t
* node
) /*!< in: row purge node */
367 heap
= mem_heap_create(1024);
369 while (node
->index
!= NULL
) {
372 /* Build the index entry */
373 entry
= row_build_index_entry(node
->row
, NULL
, index
, heap
);
375 row_purge_remove_sec_if_poss(node
, index
, entry
);
377 node
->index
= dict_table_get_next_index(node
->index
);
382 row_purge_remove_clust_if_poss(node
);
385 /***********************************************************//**
386 Purges an update of an existing record. Also purges an update of a delete
387 marked record if that record contained an externally stored field. */
390 row_purge_upd_exist_or_extern_func(
391 /*===============================*/
393 const que_thr_t
*thr
, /*!< in: query thread */
394 #endif /* UNIV_DEBUG */
395 purge_node_t
* node
) /*!< in: row purge node */
409 if (node
->rec_type
== TRX_UNDO_UPD_DEL_REC
410 || (node
->cmpl_info
& UPD_NODE_NO_ORD_CHANGE
)) {
412 goto skip_secondaries
;
415 heap
= mem_heap_create(1024);
417 while (node
->index
!= NULL
) {
420 if (row_upd_changes_ord_field_binary(node
->index
, node
->update
,
422 /* Build the older version of the index entry */
423 entry
= row_build_index_entry(node
->row
, NULL
,
426 row_purge_remove_sec_if_poss(node
, index
, entry
);
429 node
->index
= dict_table_get_next_index(node
->index
);
435 /* Free possible externally stored fields */
436 for (i
= 0; i
< upd_get_n_fields(node
->update
); i
++) {
438 const upd_field_t
* ufield
439 = upd_get_nth_field(node
->update
, i
);
441 if (dfield_is_ext(&ufield
->new_val
)) {
443 ulint internal_offset
;
446 /* We use the fact that new_val points to
447 node->undo_rec and get thus the offset of
448 dfield data inside the undo record. Then we
449 can calculate from node->roll_ptr the file
450 address of the new_val data */
454 dfield_get_data(&ufield
->new_val
))
457 ut_a(internal_offset
< UNIV_PAGE_SIZE
);
459 trx_undo_decode_roll_ptr(node
->roll_ptr
,
460 &is_insert
, &rseg_id
,
464 /* We have to acquire an X-latch to the clustered
467 index
= dict_table_get_first_index(node
->table
);
469 mtr_x_lock(dict_index_get_lock(index
), &mtr
);
471 /* NOTE: we must also acquire an X-latch to the
472 root page of the tree. We will need it when we
473 free pages from the tree. If the tree is of height 1,
474 the tree X-latch does NOT protect the root page,
475 because it is also a leaf page. Since we will have a
476 latch on an undo log page, we would break the
477 latching order if we would only later latch the
478 root page of such a tree! */
480 btr_root_get(index
, &mtr
);
482 /* We assume in purge of externally stored fields
483 that the space id of the undo log record is 0! */
485 block
= buf_page_get(0, 0, page_no
, RW_X_LATCH
, &mtr
);
486 buf_block_dbg_add_level(block
, SYNC_TRX_UNDO_PAGE
);
488 data_field
= buf_block_get_frame(block
)
489 + offset
+ internal_offset
;
491 ut_a(dfield_get_len(&ufield
->new_val
)
492 >= BTR_EXTERN_FIELD_REF_SIZE
);
493 btr_free_externally_stored_field(
495 data_field
+ dfield_get_len(&ufield
->new_val
)
496 - BTR_EXTERN_FIELD_REF_SIZE
,
497 NULL
, NULL
, NULL
, 0, RB_NONE
, &mtr
);
504 # define row_purge_upd_exist_or_extern(thr,node) \
505 row_purge_upd_exist_or_extern_func(thr,node)
506 #else /* UNIV_DEBUG */
507 # define row_purge_upd_exist_or_extern(thr,node) \
508 row_purge_upd_exist_or_extern_func(node)
509 #endif /* UNIV_DEBUG */
511 /***********************************************************//**
512 Parses the row reference and other info in a modify undo log record.
513 @return TRUE if purge operation required: NOTE that then the CALLER
514 must unfreeze data dictionary! */
517 row_purge_parse_undo_rec(
518 /*=====================*/
519 purge_node_t
* node
, /*!< in: row undo node */
520 ibool
* updated_extern
,
521 /*!< out: TRUE if an externally stored field
523 que_thr_t
* thr
) /*!< in: query thread */
525 dict_index_t
* clust_index
;
537 trx
= thr_get_trx(thr
);
539 ptr
= trx_undo_rec_get_pars(
540 node
->undo_rec
, &type
, &node
->cmpl_info
,
541 updated_extern
, &undo_no
, &table_id
);
542 node
->rec_type
= type
;
544 if (type
== TRX_UNDO_UPD_DEL_REC
&& !(*updated_extern
)) {
549 ptr
= trx_undo_update_rec_get_sys_cols(ptr
, &trx_id
, &roll_ptr
,
553 if (type
== TRX_UNDO_UPD_EXIST_REC
554 && node
->cmpl_info
& UPD_NODE_NO_ORD_CHANGE
555 && !(*updated_extern
)) {
557 /* Purge requires no changes to indexes: we may return */
562 /* Prevent DROP TABLE etc. from running when we are doing the purge
565 row_mysql_freeze_data_dictionary(trx
);
567 mutex_enter(&(dict_sys
->mutex
));
569 node
->table
= dict_table_get_on_id_low(table_id
);
571 mutex_exit(&(dict_sys
->mutex
));
573 if (node
->table
== NULL
) {
574 /* The table has been dropped: no need to do purge */
576 row_mysql_unfreeze_data_dictionary(trx
);
580 if (node
->table
->ibd_file_missing
) {
581 /* We skip purge of missing .ibd files */
588 clust_index
= dict_table_get_first_index(node
->table
);
590 if (clust_index
== NULL
) {
591 /* The table was corrupt in the data dictionary */
596 ptr
= trx_undo_rec_get_row_ref(ptr
, clust_index
, &(node
->ref
),
599 ptr
= trx_undo_update_rec_get_update(ptr
, clust_index
, type
, trx_id
,
600 roll_ptr
, info_bits
, trx
,
601 node
->heap
, &(node
->update
));
603 /* Read to the partial row the fields that occur in indexes */
605 if (!(node
->cmpl_info
& UPD_NODE_NO_ORD_CHANGE
)) {
606 ptr
= trx_undo_rec_get_partial_row(
607 ptr
, clust_index
, &node
->row
,
608 type
== TRX_UNDO_UPD_DEL_REC
,
615 /***********************************************************//**
616 Fetches an undo log record and does the purge for the recorded operation.
617 If none left, or the current purge completed, returns the control to the
618 parent node, which is always a query thread node. */
619 static __attribute__((nonnull
))
623 purge_node_t
* node
, /*!< in: row purge node */
624 que_thr_t
* thr
) /*!< in: query thread */
626 ibool updated_extern
;
631 node
->undo_rec
= trx_purge_fetch_next_rec(&node
->roll_ptr
,
634 if (!node
->undo_rec
) {
635 /* Purge completed for this query thread */
637 thr
->run_node
= que_node_get_parent(node
);
642 if (node
->undo_rec
!= &trx_purge_dummy_rec
643 && row_purge_parse_undo_rec(node
, &updated_extern
, thr
)) {
644 node
->found_clust
= FALSE
;
646 node
->index
= dict_table_get_next_index(
647 dict_table_get_first_index(node
->table
));
649 if (node
->rec_type
== TRX_UNDO_DEL_MARK_REC
) {
650 row_purge_del_mark(node
);
652 } else if (updated_extern
653 || node
->rec_type
== TRX_UNDO_UPD_EXIST_REC
) {
655 row_purge_upd_exist_or_extern(thr
, node
);
658 if (node
->found_clust
) {
659 btr_pcur_close(&(node
->pcur
));
662 row_mysql_unfreeze_data_dictionary(thr_get_trx(thr
));
665 /* Do some cleanup */
666 trx_purge_rec_release(node
->reservation
);
667 mem_heap_empty(node
->heap
);
669 thr
->run_node
= node
;
672 /***********************************************************//**
673 Does the purge operation for a single undo log record. This is a high-level
674 function used in an SQL execution graph.
675 @return query thread to run next or NULL */
680 que_thr_t
* thr
) /*!< in: query thread */
686 node
= thr
->run_node
;
688 ut_ad(que_node_get_type(node
) == QUE_NODE_PURGE
);
690 row_purge(node
, thr
);