mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / row / row0uins.c
blobaa12802d08160f4722579055ff92105a4a8a0f85
1 /*****************************************************************************
3 Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 *****************************************************************************/
19 /**************************************************//**
20 @file row/row0uins.c
21 Fresh insert undo
23 Created 2/25/1997 Heikki Tuuri
24 *******************************************************/
26 #include "row0uins.h"
28 #ifdef UNIV_NONINL
29 #include "row0uins.ic"
30 #endif
32 #include "dict0dict.h"
33 #include "dict0boot.h"
34 #include "dict0crea.h"
35 #include "trx0undo.h"
36 #include "trx0roll.h"
37 #include "btr0btr.h"
38 #include "mach0data.h"
39 #include "row0undo.h"
40 #include "row0vers.h"
41 #include "trx0trx.h"
42 #include "trx0rec.h"
43 #include "row0row.h"
44 #include "row0upd.h"
45 #include "que0que.h"
46 #include "ibuf0ibuf.h"
47 #include "log0log.h"
49 /*************************************************************************
50 IMPORTANT NOTE: Any operation that generates redo MUST check that there
51 is enough space in the redo log before for that operation. This is
52 done by calling log_free_check(). The reason for checking the
53 availability of the redo log space before the start of the operation is
54 that we MUST not hold any synchonization objects when performing the
55 check.
56 If you make a change in this module make sure that no codepath is
57 introduced where a call to log_free_check() is bypassed. */
59 /***************************************************************//**
60 Removes a clustered index record. The pcur in node was positioned on the
61 record, now it is detached.
62 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
63 static
64 ulint
65 row_undo_ins_remove_clust_rec(
66 /*==========================*/
67 undo_node_t* node) /*!< in: undo node */
69 btr_cur_t* btr_cur;
70 ibool success;
71 ulint err;
72 ulint n_tries = 0;
73 mtr_t mtr;
75 mtr_start(&mtr);
77 success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur),
78 &mtr);
79 ut_a(success);
81 if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
82 ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
84 /* Drop the index tree associated with the row in
85 SYS_INDEXES table: */
87 dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
89 mtr_commit(&mtr);
91 mtr_start(&mtr);
93 success = btr_pcur_restore_position(BTR_MODIFY_LEAF,
94 &(node->pcur), &mtr);
95 ut_a(success);
98 btr_cur = btr_pcur_get_btr_cur(&(node->pcur));
100 success = btr_cur_optimistic_delete(btr_cur, &mtr);
102 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
104 if (success) {
105 trx_undo_rec_release(node->trx, node->undo_no);
107 return(DB_SUCCESS);
109 retry:
110 /* If did not succeed, try pessimistic descent to tree */
111 mtr_start(&mtr);
113 success = btr_pcur_restore_position(BTR_MODIFY_TREE,
114 &(node->pcur), &mtr);
115 ut_a(success);
117 btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
118 trx_is_recv(node->trx)
119 ? RB_RECOVERY
120 : RB_NORMAL, &mtr);
122 /* The delete operation may fail if we have little
123 file space left: TODO: easiest to crash the database
124 and restart with more file space */
126 if (err == DB_OUT_OF_FILE_SPACE
127 && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
129 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
131 n_tries++;
133 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
135 goto retry;
138 btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
140 trx_undo_rec_release(node->trx, node->undo_no);
142 return(err);
145 /***************************************************************//**
146 Removes a secondary index entry if found.
147 @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
148 static
149 ulint
150 row_undo_ins_remove_sec_low(
151 /*========================*/
152 ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
153 depending on whether we wish optimistic or
154 pessimistic descent down the index tree */
155 dict_index_t* index, /*!< in: index */
156 dtuple_t* entry) /*!< in: index entry to remove */
158 btr_pcur_t pcur;
159 btr_cur_t* btr_cur;
160 ibool found;
161 ibool success;
162 ulint err;
163 mtr_t mtr;
165 mtr_start(&mtr);
167 found = row_search_index_entry(index, entry, mode, &pcur, &mtr);
169 btr_cur = btr_pcur_get_btr_cur(&pcur);
171 if (!found) {
172 /* Not found */
174 btr_pcur_close(&pcur);
175 mtr_commit(&mtr);
177 return(DB_SUCCESS);
180 if (mode == BTR_MODIFY_LEAF) {
181 success = btr_cur_optimistic_delete(btr_cur, &mtr);
183 if (success) {
184 err = DB_SUCCESS;
185 } else {
186 err = DB_FAIL;
188 } else {
189 ut_ad(mode == BTR_MODIFY_TREE);
191 /* No need to distinguish RB_RECOVERY here, because we
192 are deleting a secondary index record: the distinction
193 between RB_NORMAL and RB_RECOVERY only matters when
194 deleting a record that contains externally stored
195 columns. */
196 ut_ad(!dict_index_is_clust(index));
197 btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
198 RB_NORMAL, &mtr);
201 btr_pcur_close(&pcur);
202 mtr_commit(&mtr);
204 return(err);
207 /***************************************************************//**
208 Removes a secondary index entry from the index if found. Tries first
209 optimistic, then pessimistic descent down the tree.
210 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
211 static
212 ulint
213 row_undo_ins_remove_sec(
214 /*====================*/
215 dict_index_t* index, /*!< in: index */
216 dtuple_t* entry) /*!< in: index entry to insert */
218 ulint err;
219 ulint n_tries = 0;
221 /* Try first optimistic descent to the B-tree */
223 err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
225 if (err == DB_SUCCESS) {
227 return(err);
230 /* Try then pessimistic descent to the B-tree */
231 retry:
232 err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
234 /* The delete operation may fail if we have little
235 file space left: TODO: easiest to crash the database
236 and restart with more file space */
238 if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
240 n_tries++;
242 os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
244 goto retry;
247 return(err);
250 /***********************************************************//**
251 Parses the row reference and other info in a fresh insert undo record. */
252 static
253 void
254 row_undo_ins_parse_undo_rec(
255 /*========================*/
256 undo_node_t* node) /*!< in/out: row undo node */
258 dict_index_t* clust_index;
259 byte* ptr;
260 undo_no_t undo_no;
261 dulint table_id;
262 ulint type;
263 ulint dummy;
264 ibool dummy_extern;
266 ut_ad(node);
268 ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
269 &dummy_extern, &undo_no, &table_id);
270 ut_ad(type == TRX_UNDO_INSERT_REC);
271 node->rec_type = type;
273 node->update = NULL;
274 node->table = dict_table_get_on_id(table_id, node->trx);
276 /* Skip the UNDO if we can't find the table or the .ibd file. */
277 if (UNIV_UNLIKELY(node->table == NULL)) {
278 } else if (UNIV_UNLIKELY(node->table->ibd_file_missing)) {
279 node->table = NULL;
280 } else {
281 clust_index = dict_table_get_first_index(node->table);
283 if (clust_index != NULL) {
284 ptr = trx_undo_rec_get_row_ref(
285 ptr, clust_index, &node->ref, node->heap);
286 } else {
287 ut_print_timestamp(stderr);
288 fprintf(stderr, " InnoDB: table ");
289 ut_print_name(stderr, node->trx, TRUE,
290 node->table->name);
291 fprintf(stderr, " has no indexes, "
292 "ignoring the table\n");
294 node->table = NULL;
299 /***********************************************************//**
300 Undoes a fresh insert of a row to a table. A fresh insert means that
301 the same clustered index unique key did not have any record, even delete
302 marked, at the time of the insert. InnoDB is eager in a rollback:
303 if it figures out that an index record will be removed in the purge
304 anyway, it will remove it in the rollback.
305 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
306 UNIV_INTERN
307 ulint
308 row_undo_ins(
309 /*=========*/
310 undo_node_t* node) /*!< in: row undo node */
312 ut_ad(node);
313 ut_ad(node->state == UNDO_NODE_INSERT);
315 row_undo_ins_parse_undo_rec(node);
317 if (!node->table || !row_undo_search_clust_to_pcur(node)) {
318 trx_undo_rec_release(node->trx, node->undo_no);
320 return(DB_SUCCESS);
323 /* Iterate over all the indexes and undo the insert.*/
325 /* Skip the clustered index (the first index) */
326 node->index = dict_table_get_next_index(
327 dict_table_get_first_index(node->table));
329 while (node->index != NULL) {
330 dtuple_t* entry;
331 ulint err;
333 entry = row_build_index_entry(node->row, node->ext,
334 node->index, node->heap);
335 if (UNIV_UNLIKELY(!entry)) {
336 /* The database must have crashed after
337 inserting a clustered index record but before
338 writing all the externally stored columns of
339 that record, or a statement is being rolled
340 back because an error occurred while storing
341 off-page columns.
343 Because secondary index entries are inserted
344 after the clustered index record, we may
345 assume that the secondary index record does
346 not exist. */
347 } else {
348 log_free_check();
349 err = row_undo_ins_remove_sec(node->index, entry);
351 if (err != DB_SUCCESS) {
353 return(err);
357 node->index = dict_table_get_next_index(node->index);
360 log_free_check();
361 return(row_undo_ins_remove_clust_rec(node));