mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / row / row0purge.c
blob4d4c1afc458e673aac52f675758dd11cb67654d4
1 /*****************************************************************************
3 Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 *****************************************************************************/
19 /**************************************************//**
20 @file row/row0purge.c
21 Purge obsolete records
23 Created 3/14/1997 Heikki Tuuri
24 *******************************************************/
26 #include "row0purge.h"
28 #ifdef UNIV_NONINL
29 #include "row0purge.ic"
30 #endif
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "trx0rseg.h"
35 #include "trx0trx.h"
36 #include "trx0roll.h"
37 #include "trx0undo.h"
38 #include "trx0purge.h"
39 #include "trx0rec.h"
40 #include "que0que.h"
41 #include "row0row.h"
42 #include "row0upd.h"
43 #include "row0vers.h"
44 #include "row0mysql.h"
45 #include "log0log.h"
47 /*************************************************************************
48 IMPORTANT NOTE: Any operation that generates redo MUST check that there
49 is enough space in the redo log before for that operation. This is
50 done by calling log_free_check(). The reason for checking the
51 availability of the redo log space before the start of the operation is
52 that we MUST not hold any synchonization objects when performing the
53 check.
54 If you make a change in this module make sure that no codepath is
55 introduced where a call to log_free_check() is bypassed. */
57 /********************************************************************//**
58 Creates a purge node to a query graph.
59 @return own: purge node */
60 UNIV_INTERN
61 purge_node_t*
62 row_purge_node_create(
63 /*==================*/
64 que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
65 mem_heap_t* heap) /*!< in: memory heap where created */
67 purge_node_t* node;
69 ut_ad(parent && heap);
71 node = mem_heap_alloc(heap, sizeof(purge_node_t));
73 node->common.type = QUE_NODE_PURGE;
74 node->common.parent = parent;
76 node->heap = mem_heap_create(256);
78 return(node);
81 /***********************************************************//**
82 Repositions the pcur in the purge node on the clustered index record,
83 if found.
84 @return TRUE if the record was found */
85 static
86 ibool
87 row_purge_reposition_pcur(
88 /*======================*/
89 ulint mode, /*!< in: latching mode */
90 purge_node_t* node, /*!< in: row purge node */
91 mtr_t* mtr) /*!< in: mtr */
93 ibool found;
95 if (node->found_clust) {
96 found = btr_pcur_restore_position(mode, &(node->pcur), mtr);
98 return(found);
101 found = row_search_on_row_ref(&(node->pcur), mode, node->table,
102 node->ref, mtr);
103 node->found_clust = found;
105 if (found) {
106 btr_pcur_store_position(&(node->pcur), mtr);
109 return(found);
112 /***********************************************************//**
113 Removes a delete marked clustered index record if possible.
114 @return TRUE if success, or if not found, or if modified after the
115 delete marking */
116 static
117 ibool
118 row_purge_remove_clust_if_poss_low(
119 /*===============================*/
120 purge_node_t* node, /*!< in: row purge node */
121 ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
123 dict_index_t* index;
124 btr_pcur_t* pcur;
125 btr_cur_t* btr_cur;
126 ibool success;
127 ulint err;
128 mtr_t mtr;
129 rec_t* rec;
130 mem_heap_t* heap = NULL;
131 ulint offsets_[REC_OFFS_NORMAL_SIZE];
132 rec_offs_init(offsets_);
134 index = dict_table_get_first_index(node->table);
136 pcur = &(node->pcur);
137 btr_cur = btr_pcur_get_btr_cur(pcur);
139 log_free_check();
140 mtr_start(&mtr);
142 success = row_purge_reposition_pcur(mode, node, &mtr);
144 if (!success) {
145 /* The record is already removed */
147 btr_pcur_commit_specify_mtr(pcur, &mtr);
149 return(TRUE);
152 rec = btr_pcur_get_rec(pcur);
154 if (0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr(
155 rec, index, rec_get_offsets(
156 rec, index, offsets_,
157 ULINT_UNDEFINED, &heap)))) {
158 if (UNIV_LIKELY_NULL(heap)) {
159 mem_heap_free(heap);
161 /* Someone else has modified the record later: do not remove */
162 btr_pcur_commit_specify_mtr(pcur, &mtr);
164 return(TRUE);
167 if (UNIV_LIKELY_NULL(heap)) {
168 mem_heap_free(heap);
171 if (mode == BTR_MODIFY_LEAF) {
172 success = btr_cur_optimistic_delete(btr_cur, &mtr);
173 } else {
174 ut_ad(mode == BTR_MODIFY_TREE);
175 btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
176 RB_NONE, &mtr);
178 if (err == DB_SUCCESS) {
179 success = TRUE;
180 } else if (err == DB_OUT_OF_FILE_SPACE) {
181 success = FALSE;
182 } else {
183 ut_error;
187 btr_pcur_commit_specify_mtr(pcur, &mtr);
189 return(success);
192 /***********************************************************//**
193 Removes a clustered index record if it has not been modified after the delete
194 marking. */
195 static
196 void
197 row_purge_remove_clust_if_poss(
198 /*===========================*/
199 purge_node_t* node) /*!< in: row purge node */
201 ibool success;
202 ulint n_tries = 0;
204 /* fputs("Purge: Removing clustered record\n", stderr); */
206 success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF);
207 if (success) {
209 return;
211 retry:
212 success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE);
213 /* The delete operation may fail if we have little
214 file space left: TODO: easiest to crash the database
215 and restart with more file space */
217 if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
218 n_tries++;
220 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
222 goto retry;
225 ut_a(success);
228 /***********************************************************//**
229 Removes a secondary index entry if possible.
230 @return TRUE if success or if not found */
231 static
232 ibool
233 row_purge_remove_sec_if_poss_low(
234 /*=============================*/
235 purge_node_t* node, /*!< in: row purge node */
236 dict_index_t* index, /*!< in: index */
237 const dtuple_t* entry, /*!< in: index entry */
238 ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
239 BTR_MODIFY_TREE */
241 btr_pcur_t pcur;
242 btr_cur_t* btr_cur;
243 ibool success;
244 ibool old_has = 0; /* remove warning */
245 ibool found;
246 ulint err;
247 mtr_t mtr;
248 mtr_t mtr_vers;
250 log_free_check();
251 mtr_start(&mtr);
253 found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
255 if (!found) {
256 /* Not found. This is a legitimate condition. In a
257 rollback, InnoDB will remove secondary recs that would
258 be purged anyway. Then the actual purge will not find
259 the secondary index record. Also, the purge itself is
260 eager: if it comes to consider a secondary index
261 record, and notices it does not need to exist in the
262 index, it will remove it. Then if/when the purge
263 comes to consider the secondary index record a second
264 time, it will not exist any more in the index. */
266 /* fputs("PURGE:........sec entry not found\n", stderr); */
267 /* dtuple_print(stderr, entry); */
269 btr_pcur_close(&pcur);
270 mtr_commit(&mtr);
272 return(TRUE);
275 btr_cur = btr_pcur_get_btr_cur(&pcur);
277 /* We should remove the index record if no later version of the row,
278 which cannot be purged yet, requires its existence. If some requires,
279 we should do nothing. */
281 mtr_start(&mtr_vers);
283 success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr_vers);
285 if (success) {
286 old_has = row_vers_old_has_index_entry(
287 TRUE, btr_pcur_get_rec(&(node->pcur)),
288 &mtr_vers, index, entry);
291 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
293 if (!success || !old_has) {
294 /* Remove the index record */
296 if (mode == BTR_MODIFY_LEAF) {
297 success = btr_cur_optimistic_delete(btr_cur, &mtr);
298 } else {
299 ut_ad(mode == BTR_MODIFY_TREE);
300 btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
301 RB_NONE, &mtr);
302 success = err == DB_SUCCESS;
303 ut_a(success || err == DB_OUT_OF_FILE_SPACE);
307 btr_pcur_close(&pcur);
308 mtr_commit(&mtr);
310 return(success);
313 /***********************************************************//**
314 Removes a secondary index entry if possible. */
315 UNIV_INLINE
316 void
317 row_purge_remove_sec_if_poss(
318 /*=========================*/
319 purge_node_t* node, /*!< in: row purge node */
320 dict_index_t* index, /*!< in: index */
321 dtuple_t* entry) /*!< in: index entry */
323 ibool success;
324 ulint n_tries = 0;
326 /* fputs("Purge: Removing secondary record\n", stderr); */
328 success = row_purge_remove_sec_if_poss_low(node, index, entry,
329 BTR_MODIFY_LEAF);
330 if (success) {
332 return;
334 retry:
335 success = row_purge_remove_sec_if_poss_low(node, index, entry,
336 BTR_MODIFY_TREE);
337 /* The delete operation may fail if we have little
338 file space left: TODO: easiest to crash the database
339 and restart with more file space */
341 if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
343 n_tries++;
345 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
347 goto retry;
350 ut_a(success);
353 /***********************************************************//**
354 Purges a delete marking of a record. */
355 static
356 void
357 row_purge_del_mark(
358 /*===============*/
359 purge_node_t* node) /*!< in: row purge node */
361 mem_heap_t* heap;
362 dtuple_t* entry;
363 dict_index_t* index;
365 ut_ad(node);
367 heap = mem_heap_create(1024);
369 while (node->index != NULL) {
370 index = node->index;
372 /* Build the index entry */
373 entry = row_build_index_entry(node->row, NULL, index, heap);
374 ut_a(entry);
375 row_purge_remove_sec_if_poss(node, index, entry);
377 node->index = dict_table_get_next_index(node->index);
380 mem_heap_free(heap);
382 row_purge_remove_clust_if_poss(node);
385 /***********************************************************//**
386 Purges an update of an existing record. Also purges an update of a delete
387 marked record if that record contained an externally stored field. */
388 static
389 void
390 row_purge_upd_exist_or_extern_func(
391 /*===============================*/
392 #ifdef UNIV_DEBUG
393 const que_thr_t*thr, /*!< in: query thread */
394 #endif /* UNIV_DEBUG */
395 purge_node_t* node) /*!< in: row purge node */
397 mem_heap_t* heap;
398 dtuple_t* entry;
399 dict_index_t* index;
400 ibool is_insert;
401 ulint rseg_id;
402 ulint page_no;
403 ulint offset;
404 ulint i;
405 mtr_t mtr;
407 ut_ad(node);
409 if (node->rec_type == TRX_UNDO_UPD_DEL_REC
410 || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
412 goto skip_secondaries;
415 heap = mem_heap_create(1024);
417 while (node->index != NULL) {
418 index = node->index;
420 if (row_upd_changes_ord_field_binary(node->index, node->update,
421 thr, NULL, NULL)) {
422 /* Build the older version of the index entry */
423 entry = row_build_index_entry(node->row, NULL,
424 index, heap);
425 ut_a(entry);
426 row_purge_remove_sec_if_poss(node, index, entry);
429 node->index = dict_table_get_next_index(node->index);
432 mem_heap_free(heap);
434 skip_secondaries:
435 /* Free possible externally stored fields */
436 for (i = 0; i < upd_get_n_fields(node->update); i++) {
438 const upd_field_t* ufield
439 = upd_get_nth_field(node->update, i);
441 if (dfield_is_ext(&ufield->new_val)) {
442 buf_block_t* block;
443 ulint internal_offset;
444 byte* data_field;
446 /* We use the fact that new_val points to
447 node->undo_rec and get thus the offset of
448 dfield data inside the undo record. Then we
449 can calculate from node->roll_ptr the file
450 address of the new_val data */
452 internal_offset
453 = ((const byte*)
454 dfield_get_data(&ufield->new_val))
455 - node->undo_rec;
457 ut_a(internal_offset < UNIV_PAGE_SIZE);
459 trx_undo_decode_roll_ptr(node->roll_ptr,
460 &is_insert, &rseg_id,
461 &page_no, &offset);
462 mtr_start(&mtr);
464 /* We have to acquire an X-latch to the clustered
465 index tree */
467 index = dict_table_get_first_index(node->table);
469 mtr_x_lock(dict_index_get_lock(index), &mtr);
471 /* NOTE: we must also acquire an X-latch to the
472 root page of the tree. We will need it when we
473 free pages from the tree. If the tree is of height 1,
474 the tree X-latch does NOT protect the root page,
475 because it is also a leaf page. Since we will have a
476 latch on an undo log page, we would break the
477 latching order if we would only later latch the
478 root page of such a tree! */
480 btr_root_get(index, &mtr);
482 /* We assume in purge of externally stored fields
483 that the space id of the undo log record is 0! */
485 block = buf_page_get(0, 0, page_no, RW_X_LATCH, &mtr);
486 buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
488 data_field = buf_block_get_frame(block)
489 + offset + internal_offset;
491 ut_a(dfield_get_len(&ufield->new_val)
492 >= BTR_EXTERN_FIELD_REF_SIZE);
493 btr_free_externally_stored_field(
494 index,
495 data_field + dfield_get_len(&ufield->new_val)
496 - BTR_EXTERN_FIELD_REF_SIZE,
497 NULL, NULL, NULL, 0, RB_NONE, &mtr);
498 mtr_commit(&mtr);
503 #ifdef UNIV_DEBUG
504 # define row_purge_upd_exist_or_extern(thr,node) \
505 row_purge_upd_exist_or_extern_func(thr,node)
506 #else /* UNIV_DEBUG */
507 # define row_purge_upd_exist_or_extern(thr,node) \
508 row_purge_upd_exist_or_extern_func(node)
509 #endif /* UNIV_DEBUG */
511 /***********************************************************//**
512 Parses the row reference and other info in a modify undo log record.
513 @return TRUE if purge operation required: NOTE that then the CALLER
514 must unfreeze data dictionary! */
515 static
516 ibool
517 row_purge_parse_undo_rec(
518 /*=====================*/
519 purge_node_t* node, /*!< in: row undo node */
520 ibool* updated_extern,
521 /*!< out: TRUE if an externally stored field
522 was updated */
523 que_thr_t* thr) /*!< in: query thread */
525 dict_index_t* clust_index;
526 byte* ptr;
527 trx_t* trx;
528 undo_no_t undo_no;
529 dulint table_id;
530 trx_id_t trx_id;
531 roll_ptr_t roll_ptr;
532 ulint info_bits;
533 ulint type;
535 ut_ad(node && thr);
537 trx = thr_get_trx(thr);
539 ptr = trx_undo_rec_get_pars(
540 node->undo_rec, &type, &node->cmpl_info,
541 updated_extern, &undo_no, &table_id);
542 node->rec_type = type;
544 if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
546 return(FALSE);
549 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
550 &info_bits);
551 node->table = NULL;
553 if (type == TRX_UNDO_UPD_EXIST_REC
554 && node->cmpl_info & UPD_NODE_NO_ORD_CHANGE
555 && !(*updated_extern)) {
557 /* Purge requires no changes to indexes: we may return */
559 return(FALSE);
562 /* Prevent DROP TABLE etc. from running when we are doing the purge
563 for this row */
565 row_mysql_freeze_data_dictionary(trx);
567 mutex_enter(&(dict_sys->mutex));
569 node->table = dict_table_get_on_id_low(table_id);
571 mutex_exit(&(dict_sys->mutex));
573 if (node->table == NULL) {
574 /* The table has been dropped: no need to do purge */
575 err_exit:
576 row_mysql_unfreeze_data_dictionary(trx);
577 return(FALSE);
580 if (node->table->ibd_file_missing) {
581 /* We skip purge of missing .ibd files */
583 node->table = NULL;
585 goto err_exit;
588 clust_index = dict_table_get_first_index(node->table);
590 if (clust_index == NULL) {
591 /* The table was corrupt in the data dictionary */
593 goto err_exit;
596 ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
597 node->heap);
599 ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
600 roll_ptr, info_bits, trx,
601 node->heap, &(node->update));
603 /* Read to the partial row the fields that occur in indexes */
605 if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
606 ptr = trx_undo_rec_get_partial_row(
607 ptr, clust_index, &node->row,
608 type == TRX_UNDO_UPD_DEL_REC,
609 node->heap);
612 return(TRUE);
615 /***********************************************************//**
616 Fetches an undo log record and does the purge for the recorded operation.
617 If none left, or the current purge completed, returns the control to the
618 parent node, which is always a query thread node. */
619 static __attribute__((nonnull))
620 void
621 row_purge(
622 /*======*/
623 purge_node_t* node, /*!< in: row purge node */
624 que_thr_t* thr) /*!< in: query thread */
626 ibool updated_extern;
628 ut_ad(node);
629 ut_ad(thr);
631 node->undo_rec = trx_purge_fetch_next_rec(&node->roll_ptr,
632 &node->reservation,
633 node->heap);
634 if (!node->undo_rec) {
635 /* Purge completed for this query thread */
637 thr->run_node = que_node_get_parent(node);
639 return;
642 if (node->undo_rec != &trx_purge_dummy_rec
643 && row_purge_parse_undo_rec(node, &updated_extern, thr)) {
644 node->found_clust = FALSE;
646 node->index = dict_table_get_next_index(
647 dict_table_get_first_index(node->table));
649 if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
650 row_purge_del_mark(node);
652 } else if (updated_extern
653 || node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
655 row_purge_upd_exist_or_extern(thr, node);
658 if (node->found_clust) {
659 btr_pcur_close(&(node->pcur));
662 row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
665 /* Do some cleanup */
666 trx_purge_rec_release(node->reservation);
667 mem_heap_empty(node->heap);
669 thr->run_node = node;
672 /***********************************************************//**
673 Does the purge operation for a single undo log record. This is a high-level
674 function used in an SQL execution graph.
675 @return query thread to run next or NULL */
676 UNIV_INTERN
677 que_thr_t*
678 row_purge_step(
679 /*===========*/
680 que_thr_t* thr) /*!< in: query thread */
682 purge_node_t* node;
684 ut_ad(thr);
686 node = thr->run_node;
688 ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
690 row_purge(node, thr);
692 return(thr);