mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innobase / row / row0ins.c
blob678293b492b8ea4c7ae3160ee26ce686fff21318
1 /******************************************************
2 Insert into a table
4 (c) 1996 Innobase Oy
6 Created 4/20/1996 Heikki Tuuri
7 *******************************************************/
9 #include "my_global.h" /* HAVE_* */
10 #include "m_string.h" /* for my_sys.h */
11 #include "my_sys.h" /* DEBUG_SYNC_C */
12 #include "row0ins.h"
14 #ifdef UNIV_NONINL
15 #include "row0ins.ic"
16 #endif
18 #include "dict0dict.h"
19 #include "dict0boot.h"
20 #include "trx0undo.h"
21 #include "btr0btr.h"
22 #include "btr0cur.h"
23 #include "mach0data.h"
24 #include "que0que.h"
25 #include "row0upd.h"
26 #include "row0sel.h"
27 #include "row0row.h"
28 #include "rem0cmp.h"
29 #include "lock0lock.h"
30 #include "log0log.h"
31 #include "eval0eval.h"
32 #include "data0data.h"
33 #include "usr0sess.h"
34 #include "buf0lru.h"
36 #define ROW_INS_PREV 1
37 #define ROW_INS_NEXT 2
40 /*********************************************************************
41 This prototype is copied from /mysql/sql/ha_innodb.cc.
42 Invalidates the MySQL query cache for the table.
43 NOTE that the exact prototype of this function has to be in
44 /innobase/row/row0ins.c! */
45 extern
46 void
47 innobase_invalidate_query_cache(
48 /*============================*/
49 trx_t* trx, /* in: transaction which modifies the table */
50 char* full_name, /* in: concatenation of database name, null
51 char '\0', table name, null char'\0';
52 NOTE that in Windows this is always
53 in LOWER CASE! */
54 ulint full_name_len); /* in: full name length where also the null
55 chars count */
57 /*************************************************************************
58 Creates an insert node struct. */
60 ins_node_t*
61 ins_node_create(
62 /*============*/
63 /* out, own: insert node struct */
64 ulint ins_type, /* in: INS_VALUES, ... */
65 dict_table_t* table, /* in: table where to insert */
66 mem_heap_t* heap) /* in: mem heap where created */
68 ins_node_t* node;
70 node = mem_heap_alloc(heap, sizeof(ins_node_t));
72 node->common.type = QUE_NODE_INSERT;
74 node->ins_type = ins_type;
76 node->state = INS_NODE_SET_IX_LOCK;
77 node->table = table;
78 node->index = NULL;
79 node->entry = NULL;
81 node->select = NULL;
83 node->trx_id = ut_dulint_zero;
85 node->entry_sys_heap = mem_heap_create(128);
87 node->magic_n = INS_NODE_MAGIC_N;
89 return(node);
92 /***************************************************************
93 Creates an entry template for each index of a table. */
94 static
95 void
96 ins_node_create_entry_list(
97 /*=======================*/
98 ins_node_t* node) /* in: row insert node */
100 dict_index_t* index;
101 dtuple_t* entry;
103 ut_ad(node->entry_sys_heap);
105 UT_LIST_INIT(node->entry_list);
107 index = dict_table_get_first_index(node->table);
109 while (index != NULL) {
110 entry = row_build_index_entry(node->row, index,
111 node->entry_sys_heap);
112 UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
114 index = dict_table_get_next_index(index);
118 /*********************************************************************
119 Adds system field buffers to a row. */
120 static
121 void
122 row_ins_alloc_sys_fields(
123 /*=====================*/
124 ins_node_t* node) /* in: insert node */
126 dtuple_t* row;
127 dict_table_t* table;
128 mem_heap_t* heap;
129 const dict_col_t* col;
130 dfield_t* dfield;
131 byte* ptr;
133 row = node->row;
134 table = node->table;
135 heap = node->entry_sys_heap;
137 ut_ad(row && table && heap);
138 ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
140 /* 1. Allocate buffer for row id */
142 col = dict_table_get_sys_col(table, DATA_ROW_ID);
144 dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
146 ptr = mem_heap_zalloc(heap, DATA_ROW_ID_LEN);
148 dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
150 node->row_id_buf = ptr;
152 /* 3. Allocate buffer for trx id */
154 col = dict_table_get_sys_col(table, DATA_TRX_ID);
156 dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
157 ptr = mem_heap_zalloc(heap, DATA_TRX_ID_LEN);
159 dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
161 node->trx_id_buf = ptr;
163 /* 4. Allocate buffer for roll ptr */
165 col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
167 dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
168 ptr = mem_heap_zalloc(heap, DATA_ROLL_PTR_LEN);
170 dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
173 /*************************************************************************
174 Sets a new row to insert for an INS_DIRECT node. This function is only used
175 if we have constructed the row separately, which is a rare case; this
176 function is quite slow. */
178 void
179 ins_node_set_new_row(
180 /*=================*/
181 ins_node_t* node, /* in: insert node */
182 dtuple_t* row) /* in: new row (or first row) for the node */
184 node->state = INS_NODE_SET_IX_LOCK;
185 node->index = NULL;
186 node->entry = NULL;
188 node->row = row;
190 mem_heap_empty(node->entry_sys_heap);
192 /* Create templates for index entries */
194 ins_node_create_entry_list(node);
196 /* Allocate from entry_sys_heap buffers for sys fields */
198 row_ins_alloc_sys_fields(node);
200 /* As we allocated a new trx id buf, the trx id should be written
201 there again: */
203 node->trx_id = ut_dulint_zero;
206 /***********************************************************************
207 Does an insert operation by updating a delete-marked existing record
208 in the index. This situation can occur if the delete-marked record is
209 kept in the index for consistent reads. */
210 static
211 ulint
212 row_ins_sec_index_entry_by_modify(
213 /*==============================*/
214 /* out: DB_SUCCESS or error code */
215 ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
216 depending on whether mtr holds just a leaf
217 latch or also a tree latch */
218 btr_cur_t* cursor, /* in: B-tree cursor */
219 dtuple_t* entry, /* in: index entry to insert */
220 que_thr_t* thr, /* in: query thread */
221 mtr_t* mtr) /* in: mtr */
223 big_rec_t* dummy_big_rec;
224 mem_heap_t* heap;
225 upd_t* update;
226 rec_t* rec;
227 ulint err;
229 rec = btr_cur_get_rec(cursor);
231 ut_ad((cursor->index->type & DICT_CLUSTERED) == 0);
232 ut_ad(rec_get_deleted_flag(rec,
233 dict_table_is_comp(cursor->index->table)));
235 /* We know that in the alphabetical ordering, entry and rec are
236 identified. But in their binary form there may be differences if
237 there are char fields in them. Therefore we have to calculate the
238 difference. */
240 heap = mem_heap_create(1024);
242 update = row_upd_build_sec_rec_difference_binary(
243 cursor->index, entry, rec, thr_get_trx(thr), heap);
244 if (mode == BTR_MODIFY_LEAF) {
245 /* Try an optimistic updating of the record, keeping changes
246 within the page */
248 err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor,
249 update, 0, thr, mtr);
250 if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
251 err = DB_FAIL;
253 } else {
254 ut_a(mode == BTR_MODIFY_TREE);
255 if (buf_LRU_buf_pool_running_out()) {
257 err = DB_LOCK_TABLE_FULL;
259 goto func_exit;
262 err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor,
263 &dummy_big_rec, update,
264 0, thr, mtr);
266 func_exit:
267 mem_heap_free(heap);
269 return(err);
272 /***********************************************************************
273 Does an insert operation by delete unmarking and updating a delete marked
274 existing record in the index. This situation can occur if the delete marked
275 record is kept in the index for consistent reads. */
276 static
277 ulint
278 row_ins_clust_index_entry_by_modify(
279 /*================================*/
280 /* out: DB_SUCCESS, DB_FAIL, or error code */
281 ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
282 depending on whether mtr holds just a leaf
283 latch or also a tree latch */
284 btr_cur_t* cursor, /* in: B-tree cursor */
285 big_rec_t** big_rec,/* out: possible big rec vector of fields
286 which have to be stored externally by the
287 caller */
288 dtuple_t* entry, /* in: index entry to insert */
289 ulint* ext_vec,/* in: array containing field numbers of
290 externally stored fields in entry, or NULL */
291 ulint n_ext_vec,/* in: number of fields in ext_vec */
292 que_thr_t* thr, /* in: query thread */
293 mtr_t* mtr) /* in: mtr */
295 mem_heap_t* heap;
296 rec_t* rec;
297 upd_t* update;
298 ulint err;
300 ut_ad(cursor->index->type & DICT_CLUSTERED);
302 *big_rec = NULL;
304 rec = btr_cur_get_rec(cursor);
306 ut_ad(rec_get_deleted_flag(rec,
307 dict_table_is_comp(cursor->index->table)));
309 heap = mem_heap_create(1024);
311 /* Build an update vector containing all the fields to be modified;
312 NOTE that this vector may NOT contain system columns trx_id or
313 roll_ptr */
315 update = row_upd_build_difference_binary(cursor->index, entry, ext_vec,
316 n_ext_vec, rec,
317 thr_get_trx(thr), heap);
318 if (mode == BTR_MODIFY_LEAF) {
319 /* Try optimistic updating of the record, keeping changes
320 within the page */
322 err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
323 mtr);
324 if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
325 err = DB_FAIL;
327 } else {
328 ut_a(mode == BTR_MODIFY_TREE);
329 if (buf_LRU_buf_pool_running_out()) {
331 err = DB_LOCK_TABLE_FULL;
333 goto func_exit;
335 err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
336 0, thr, mtr);
338 func_exit:
339 mem_heap_free(heap);
341 return(err);
344 /*************************************************************************
345 Returns TRUE if in a cascaded update/delete an ancestor node of node
346 updates (not DELETE, but UPDATE) table. */
347 static
348 ibool
349 row_ins_cascade_ancestor_updates_table(
350 /*===================================*/
351 /* out: TRUE if an ancestor updates table */
352 que_node_t* node, /* in: node in a query graph */
353 dict_table_t* table) /* in: table */
355 que_node_t* parent;
356 upd_node_t* upd_node;
358 parent = que_node_get_parent(node);
360 while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
362 upd_node = parent;
364 if (upd_node->table == table && upd_node->is_delete == FALSE) {
366 return(TRUE);
369 parent = que_node_get_parent(parent);
371 ut_a(parent);
374 return(FALSE);
377 /*************************************************************************
378 Returns the number of ancestor UPDATE or DELETE nodes of a
379 cascaded update/delete node. */
380 static
381 ulint
382 row_ins_cascade_n_ancestors(
383 /*========================*/
384 /* out: number of ancestors */
385 que_node_t* node) /* in: node in a query graph */
387 que_node_t* parent;
388 ulint n_ancestors = 0;
390 parent = que_node_get_parent(node);
392 while (que_node_get_type(parent) == QUE_NODE_UPDATE) {
393 n_ancestors++;
395 parent = que_node_get_parent(parent);
397 ut_a(parent);
400 return(n_ancestors);
403 /**********************************************************************
404 Calculates the update vector node->cascade->update for a child table in
405 a cascaded update. */
406 static
407 ulint
408 row_ins_cascade_calc_update_vec(
409 /*============================*/
410 /* out: number of fields in the
411 calculated update vector; the value
412 can also be 0 if no foreign key
413 fields changed; the returned value
414 is ULINT_UNDEFINED if the column
415 type in the child table is too short
416 to fit the new value in the parent
417 table: that means the update fails */
418 upd_node_t* node, /* in: update node of the parent
419 table */
420 dict_foreign_t* foreign, /* in: foreign key constraint whose
421 type is != 0 */
422 mem_heap_t* heap) /* in: memory heap to use as
423 temporary storage */
425 upd_node_t* cascade = node->cascade_node;
426 dict_table_t* table = foreign->foreign_table;
427 dict_index_t* index = foreign->foreign_index;
428 upd_t* update;
429 dict_table_t* parent_table;
430 dict_index_t* parent_index;
431 upd_t* parent_update;
432 ulint n_fields_updated;
433 ulint parent_field_no;
434 ulint i;
435 ulint j;
437 ut_a(node);
438 ut_a(foreign);
439 ut_a(cascade);
440 ut_a(table);
441 ut_a(index);
443 /* Calculate the appropriate update vector which will set the fields
444 in the child index record to the same value (possibly padded with
445 spaces if the column is a fixed length CHAR or FIXBINARY column) as
446 the referenced index record will get in the update. */
448 parent_table = node->table;
449 ut_a(parent_table == foreign->referenced_table);
450 parent_index = foreign->referenced_index;
451 parent_update = node->update;
453 update = cascade->update;
455 update->info_bits = 0;
456 update->n_fields = foreign->n_fields;
458 n_fields_updated = 0;
460 for (i = 0; i < foreign->n_fields; i++) {
462 parent_field_no = dict_table_get_nth_col_pos(
463 parent_table,
464 dict_index_get_nth_col_no(parent_index, i));
466 for (j = 0; j < parent_update->n_fields; j++) {
467 const upd_field_t* parent_ufield
468 = &parent_update->fields[j];
470 if (parent_ufield->field_no == parent_field_no) {
472 ulint min_size;
473 const dict_col_t* col;
474 upd_field_t* ufield;
476 col = dict_index_get_nth_col(index, i);
478 /* A field in the parent index record is
479 updated. Let us make the update vector
480 field for the child table. */
482 ufield = update->fields + n_fields_updated;
484 ufield->field_no
485 = dict_table_get_nth_col_pos(
486 table, dict_col_get_no(col));
487 ufield->exp = NULL;
489 ufield->new_val = parent_ufield->new_val;
491 /* Do not allow a NOT NULL column to be
492 updated as NULL */
494 if (ufield->new_val.len == UNIV_SQL_NULL
495 && (col->prtype & DATA_NOT_NULL)) {
497 return(ULINT_UNDEFINED);
500 /* If the new value would not fit in the
501 column, do not allow the update */
503 if (ufield->new_val.len != UNIV_SQL_NULL
504 && dtype_get_at_most_n_mbchars(
505 col->prtype,
506 col->mbminlen, col->mbmaxlen,
507 col->len,
508 ufield->new_val.len,
509 ufield->new_val.data)
510 < ufield->new_val.len) {
512 return(ULINT_UNDEFINED);
515 /* If the parent column type has a different
516 length than the child column type, we may
517 need to pad with spaces the new value of the
518 child column */
520 min_size = dict_col_get_min_size(col);
522 if (min_size
523 && ufield->new_val.len != UNIV_SQL_NULL
524 && ufield->new_val.len < min_size) {
526 char* pad_start;
527 const char* pad_end;
528 ufield->new_val.data = mem_heap_alloc(
529 heap, min_size);
530 pad_start = ((char*) ufield
531 ->new_val.data)
532 + ufield->new_val.len;
533 pad_end = ((char*) ufield
534 ->new_val.data)
535 + min_size;
536 ufield->new_val.len = min_size;
537 ut_memcpy(ufield->new_val.data,
538 parent_ufield->new_val.data,
539 parent_ufield->new_val.len);
541 switch (UNIV_EXPECT(col->mbminlen,1)) {
542 default:
543 ut_error;
544 case 1:
545 if (UNIV_UNLIKELY
546 (dtype_get_charset_coll(
547 col->prtype)
548 == DATA_MYSQL_BINARY_CHARSET_COLL)) {
549 /* Do not pad BINARY
550 columns. */
551 return(ULINT_UNDEFINED);
554 /* space=0x20 */
555 memset(pad_start, 0x20,
556 pad_end - pad_start);
557 break;
558 case 2:
559 /* space=0x0020 */
560 ut_a(!(ufield->new_val.len
561 % 2));
562 ut_a(!(min_size % 2));
563 do {
564 *pad_start++ = 0x00;
565 *pad_start++ = 0x20;
566 } while (pad_start < pad_end);
567 break;
571 ufield->extern_storage = FALSE;
573 n_fields_updated++;
578 update->n_fields = n_fields_updated;
580 return(n_fields_updated);
583 /*************************************************************************
584 Set detailed error message associated with foreign key errors for
585 the given transaction. */
586 static
587 void
588 row_ins_set_detailed(
589 /*=================*/
590 trx_t* trx, /* in: transaction */
591 dict_foreign_t* foreign) /* in: foreign key constraint */
593 mutex_enter(&srv_misc_tmpfile_mutex);
594 rewind(srv_misc_tmpfile);
596 if (os_file_set_eof(srv_misc_tmpfile)) {
597 ut_print_name(srv_misc_tmpfile, trx, TRUE,
598 foreign->foreign_table_name);
599 dict_print_info_on_foreign_key_in_create_format(
600 srv_misc_tmpfile, trx, foreign, FALSE);
601 trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
602 } else {
603 trx_set_detailed_error(trx, "temp file operation failed");
606 mutex_exit(&srv_misc_tmpfile_mutex);
609 /*************************************************************************
610 Reports a foreign key error associated with an update or a delete of a
611 parent table index entry. */
612 static
613 void
614 row_ins_foreign_report_err(
615 /*=======================*/
616 const char* errstr, /* in: error string from the viewpoint
617 of the parent table */
618 que_thr_t* thr, /* in: query thread whose run_node
619 is an update node */
620 dict_foreign_t* foreign, /* in: foreign key constraint */
621 rec_t* rec, /* in: a matching index record in the
622 child table */
623 dtuple_t* entry) /* in: index entry in the parent
624 table */
626 FILE* ef = dict_foreign_err_file;
627 trx_t* trx = thr_get_trx(thr);
629 row_ins_set_detailed(trx, foreign);
631 mutex_enter(&dict_foreign_err_mutex);
632 rewind(ef);
633 ut_print_timestamp(ef);
634 fputs(" Transaction:\n", ef);
635 trx_print(ef, trx, 600);
637 fputs("Foreign key constraint fails for table ", ef);
638 ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
639 fputs(":\n", ef);
640 dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
641 TRUE);
642 putc('\n', ef);
643 fputs(errstr, ef);
644 fputs(" in parent table, in index ", ef);
645 ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
646 if (entry) {
647 fputs(" tuple:\n", ef);
648 dtuple_print(ef, entry);
650 fputs("\nBut in child table ", ef);
651 ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
652 fputs(", in index ", ef);
653 ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
654 if (rec) {
655 fputs(", there is a record:\n", ef);
656 rec_print(ef, rec, foreign->foreign_index);
657 } else {
658 fputs(", the record is not available\n", ef);
660 putc('\n', ef);
662 mutex_exit(&dict_foreign_err_mutex);
665 /*************************************************************************
666 Reports a foreign key error to dict_foreign_err_file when we are trying
667 to add an index entry to a child table. Note that the adding may be the result
668 of an update, too. */
669 static
670 void
671 row_ins_foreign_report_add_err(
672 /*===========================*/
673 trx_t* trx, /* in: transaction */
674 dict_foreign_t* foreign, /* in: foreign key constraint */
675 rec_t* rec, /* in: a record in the parent table:
676 it does not match entry because we
677 have an error! */
678 dtuple_t* entry) /* in: index entry to insert in the
679 child table */
681 FILE* ef = dict_foreign_err_file;
683 row_ins_set_detailed(trx, foreign);
685 mutex_enter(&dict_foreign_err_mutex);
686 rewind(ef);
687 ut_print_timestamp(ef);
688 fputs(" Transaction:\n", ef);
689 trx_print(ef, trx, 600);
690 fputs("Foreign key constraint fails for table ", ef);
691 ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
692 fputs(":\n", ef);
693 dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign,
694 TRUE);
695 fputs("\nTrying to add in child table, in index ", ef);
696 ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
697 if (entry) {
698 fputs(" tuple:\n", ef);
699 dtuple_print(ef, entry);
701 fputs("\nBut in parent table ", ef);
702 ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
703 fputs(", in index ", ef);
704 ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
705 fputs(",\nthe closest match we can find is record:\n", ef);
706 if (rec && page_rec_is_supremum(rec)) {
707 /* If the cursor ended on a supremum record, it is better
708 to report the previous record in the error message, so that
709 the user gets a more descriptive error message. */
710 rec = page_rec_get_prev(rec);
713 if (rec) {
714 rec_print(ef, rec, foreign->referenced_index);
716 putc('\n', ef);
718 mutex_exit(&dict_foreign_err_mutex);
721 /*************************************************************************
722 Invalidate the query cache for the given table. */
723 static
724 void
725 row_ins_invalidate_query_cache(
726 /*===========================*/
727 que_thr_t* thr, /* in: query thread whose run_node
728 is an update node */
729 const char* name) /* in: table name prefixed with
730 database name and a '/' character */
732 char* buf;
733 char* ptr;
734 ulint len = strlen(name) + 1;
736 buf = mem_strdupl(name, len);
738 ptr = strchr(buf, '/');
739 ut_a(ptr);
740 *ptr = '\0';
742 /* We call a function in ha_innodb.cc */
743 #ifndef UNIV_HOTBACKUP
744 innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
745 #endif
746 mem_free(buf);
749 /*************************************************************************
750 Perform referential actions or checks when a parent row is deleted or updated
751 and the constraint had an ON DELETE or ON UPDATE condition which was not
752 RESTRICT. */
753 static
754 ulint
755 row_ins_foreign_check_on_constraint(
756 /*================================*/
757 /* out: DB_SUCCESS, DB_LOCK_WAIT,
758 or error code */
759 que_thr_t* thr, /* in: query thread whose run_node
760 is an update node */
761 dict_foreign_t* foreign, /* in: foreign key constraint whose
762 type is != 0 */
763 btr_pcur_t* pcur, /* in: cursor placed on a matching
764 index record in the child table */
765 dtuple_t* entry, /* in: index entry in the parent
766 table */
767 mtr_t* mtr) /* in: mtr holding the latch of pcur
768 page */
770 upd_node_t* node;
771 upd_node_t* cascade;
772 dict_table_t* table = foreign->foreign_table;
773 dict_index_t* index;
774 dict_index_t* clust_index;
775 dtuple_t* ref;
776 mem_heap_t* upd_vec_heap = NULL;
777 rec_t* rec;
778 rec_t* clust_rec;
779 upd_t* update;
780 ulint n_to_update;
781 ulint err;
782 ulint i;
783 trx_t* trx;
784 mem_heap_t* tmp_heap = NULL;
786 ut_a(thr);
787 ut_a(foreign);
788 ut_a(pcur);
789 ut_a(mtr);
791 trx = thr_get_trx(thr);
793 /* Since we are going to delete or update a row, we have to invalidate
794 the MySQL query cache for table. A deadlock of threads is not possible
795 here because the caller of this function does not hold any latches with
796 the sync0sync.h rank above the kernel mutex. The query cache mutex has
797 a rank just above the kernel mutex. */
799 row_ins_invalidate_query_cache(thr, table->name);
801 node = thr->run_node;
803 if (node->is_delete && 0 == (foreign->type
804 & (DICT_FOREIGN_ON_DELETE_CASCADE
805 | DICT_FOREIGN_ON_DELETE_SET_NULL))) {
807 row_ins_foreign_report_err("Trying to delete",
808 thr, foreign,
809 btr_pcur_get_rec(pcur), entry);
811 return(DB_ROW_IS_REFERENCED);
814 if (!node->is_delete && 0 == (foreign->type
815 & (DICT_FOREIGN_ON_UPDATE_CASCADE
816 | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
818 /* This is an UPDATE */
820 row_ins_foreign_report_err("Trying to update",
821 thr, foreign,
822 btr_pcur_get_rec(pcur), entry);
824 return(DB_ROW_IS_REFERENCED);
827 if (node->cascade_node == NULL) {
828 /* Extend our query graph by creating a child to current
829 update node. The child is used in the cascade or set null
830 operation. */
832 node->cascade_heap = mem_heap_create(128);
833 node->cascade_node = row_create_update_node_for_mysql(
834 table, node->cascade_heap);
835 que_node_set_parent(node->cascade_node, node);
838 /* Initialize cascade_node to do the operation we want. Note that we
839 use the SAME cascade node to do all foreign key operations of the
840 SQL DELETE: the table of the cascade node may change if there are
841 several child tables to the table where the delete is done! */
843 cascade = node->cascade_node;
845 cascade->table = table;
847 cascade->foreign = foreign;
849 if (node->is_delete
850 && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
851 cascade->is_delete = TRUE;
852 } else {
853 cascade->is_delete = FALSE;
855 if (foreign->n_fields > cascade->update_n_fields) {
856 /* We have to make the update vector longer */
858 cascade->update = upd_create(foreign->n_fields,
859 node->cascade_heap);
860 cascade->update_n_fields = foreign->n_fields;
864 /* We do not allow cyclic cascaded updating (DELETE is allowed,
865 but not UPDATE) of the same table, as this can lead to an infinite
866 cycle. Check that we are not updating the same table which is
867 already being modified in this cascade chain. We have to check
868 this also because the modification of the indexes of a 'parent'
869 table may still be incomplete, and we must avoid seeing the indexes
870 of the parent table in an inconsistent state! */
872 if (!cascade->is_delete
873 && row_ins_cascade_ancestor_updates_table(cascade, table)) {
875 /* We do not know if this would break foreign key
876 constraints, but play safe and return an error */
878 err = DB_ROW_IS_REFERENCED;
880 row_ins_foreign_report_err(
881 "Trying an update, possibly causing a cyclic"
882 " cascaded update\n"
883 "in the child table,", thr, foreign,
884 btr_pcur_get_rec(pcur), entry);
886 goto nonstandard_exit_func;
889 if (row_ins_cascade_n_ancestors(cascade) >= 15) {
890 err = DB_ROW_IS_REFERENCED;
892 row_ins_foreign_report_err(
893 "Trying a too deep cascaded delete or update\n",
894 thr, foreign, btr_pcur_get_rec(pcur), entry);
896 goto nonstandard_exit_func;
899 index = btr_pcur_get_btr_cur(pcur)->index;
901 ut_a(index == foreign->foreign_index);
903 rec = btr_pcur_get_rec(pcur);
905 if (index->type & DICT_CLUSTERED) {
906 /* pcur is already positioned in the clustered index of
907 the child table */
909 clust_index = index;
910 clust_rec = rec;
911 } else {
912 /* We have to look for the record in the clustered index
913 in the child table */
915 clust_index = dict_table_get_first_index(table);
917 tmp_heap = mem_heap_create(256);
919 ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
920 tmp_heap);
921 btr_pcur_open_with_no_init(clust_index, ref,
922 PAGE_CUR_LE, BTR_SEARCH_LEAF,
923 cascade->pcur, 0, mtr);
925 clust_rec = btr_pcur_get_rec(cascade->pcur);
927 if (!page_rec_is_user_rec(clust_rec)
928 || btr_pcur_get_low_match(cascade->pcur)
929 < dict_index_get_n_unique(clust_index)) {
931 fputs("InnoDB: error in cascade of a foreign key op\n"
932 "InnoDB: ", stderr);
933 dict_index_name_print(stderr, trx, index);
935 fputs("\n"
936 "InnoDB: record ", stderr);
937 rec_print(stderr, rec, index);
938 fputs("\n"
939 "InnoDB: clustered record ", stderr);
940 rec_print(stderr, clust_rec, clust_index);
941 fputs("\n"
942 "InnoDB: Submit a detailed bug report to"
943 " http://bugs.mysql.com\n", stderr);
945 err = DB_SUCCESS;
947 goto nonstandard_exit_func;
951 /* Set an X-lock on the row to delete or update in the child table */
953 err = lock_table(0, table, LOCK_IX, thr);
955 if (err == DB_SUCCESS) {
956 /* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
957 we already have a normal shared lock on the appropriate
958 gap if the search criterion was not unique */
960 err = lock_clust_rec_read_check_and_lock_alt(
961 0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP,
962 thr);
965 if (err != DB_SUCCESS) {
967 goto nonstandard_exit_func;
970 if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
971 /* This can happen if there is a circular reference of
972 rows such that cascading delete comes to delete a row
973 already in the process of being delete marked */
974 err = DB_SUCCESS;
976 goto nonstandard_exit_func;
979 if (node->is_delete
980 ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
981 : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
983 /* Build the appropriate update vector which sets
984 foreign->n_fields first fields in rec to SQL NULL */
986 update = cascade->update;
988 update->info_bits = 0;
989 update->n_fields = foreign->n_fields;
990 UNIV_MEM_INVALID(update->fields,
991 update->n_fields * sizeof *update->fields);
993 for (i = 0; i < foreign->n_fields; i++) {
994 (update->fields + i)->field_no
995 = dict_table_get_nth_col_pos(
996 table,
997 dict_index_get_nth_col_no(index, i));
998 (update->fields + i)->exp = NULL;
999 (update->fields + i)->new_val.len = UNIV_SQL_NULL;
1000 (update->fields + i)->new_val.data = NULL;
1001 (update->fields + i)->extern_storage = FALSE;
1005 if (!node->is_delete
1006 && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
1008 /* Build the appropriate update vector which sets changing
1009 foreign->n_fields first fields in rec to new values */
1011 upd_vec_heap = mem_heap_create(256);
1013 n_to_update = row_ins_cascade_calc_update_vec(node, foreign,
1014 upd_vec_heap);
1015 if (n_to_update == ULINT_UNDEFINED) {
1016 err = DB_ROW_IS_REFERENCED;
1018 row_ins_foreign_report_err(
1019 "Trying a cascaded update where the"
1020 " updated value in the child\n"
1021 "table would not fit in the length"
1022 " of the column, or the value would\n"
1023 "be NULL and the column is"
1024 " declared as not NULL in the child table,",
1025 thr, foreign, btr_pcur_get_rec(pcur), entry);
1027 goto nonstandard_exit_func;
1030 if (cascade->update->n_fields == 0) {
1032 /* The update does not change any columns referred
1033 to in this foreign key constraint: no need to do
1034 anything */
1036 err = DB_SUCCESS;
1038 goto nonstandard_exit_func;
1042 /* Store pcur position and initialize or store the cascade node
1043 pcur stored position */
1045 btr_pcur_store_position(pcur, mtr);
1047 if (index == clust_index) {
1048 btr_pcur_copy_stored_position(cascade->pcur, pcur);
1049 } else {
1050 btr_pcur_store_position(cascade->pcur, mtr);
1053 mtr_commit(mtr);
1055 ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
1057 cascade->state = UPD_NODE_UPDATE_CLUSTERED;
1059 err = row_update_cascade_for_mysql(thr, cascade,
1060 foreign->foreign_table);
1062 if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
1063 fprintf(stderr,
1064 "InnoDB: error: table %s has the counter 0"
1065 " though there is\n"
1066 "InnoDB: a FOREIGN KEY check running on it.\n",
1067 foreign->foreign_table->name);
1070 /* Release the data dictionary latch for a while, so that we do not
1071 starve other threads from doing CREATE TABLE etc. if we have a huge
1072 cascaded operation running. The counter n_foreign_key_checks_running
1073 will prevent other users from dropping or ALTERing the table when we
1074 release the latch. */
1076 row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
1078 DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
1080 row_mysql_freeze_data_dictionary(thr_get_trx(thr));
1082 mtr_start(mtr);
1084 /* Restore pcur position */
1086 btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
1088 if (tmp_heap) {
1089 mem_heap_free(tmp_heap);
1092 if (upd_vec_heap) {
1093 mem_heap_free(upd_vec_heap);
1096 return(err);
1098 nonstandard_exit_func:
1099 if (tmp_heap) {
1100 mem_heap_free(tmp_heap);
1103 if (upd_vec_heap) {
1104 mem_heap_free(upd_vec_heap);
1107 btr_pcur_store_position(pcur, mtr);
1109 mtr_commit(mtr);
1110 mtr_start(mtr);
1112 btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
1114 return(err);
1117 /*************************************************************************
1118 Sets a shared lock on a record. Used in locking possible duplicate key
1119 records and also in checking foreign key constraints. */
1120 static
1121 ulint
1122 row_ins_set_shared_rec_lock(
1123 /*========================*/
1124 /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC,
1125 or error code */
1126 ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
1127 LOCK_REC_NOT_GAP type lock */
1128 rec_t* rec, /* in: record */
1129 dict_index_t* index, /* in: index */
1130 const ulint* offsets,/* in: rec_get_offsets(rec, index) */
1131 que_thr_t* thr) /* in: query thread */
1133 ulint err;
1135 ut_ad(rec_offs_validate(rec, index, offsets));
1137 if (index->type & DICT_CLUSTERED) {
1138 err = lock_clust_rec_read_check_and_lock(
1139 0, rec, index, offsets, LOCK_S, type, thr);
1140 } else {
1141 err = lock_sec_rec_read_check_and_lock(
1142 0, rec, index, offsets, LOCK_S, type, thr);
1145 return(err);
1148 #ifndef UNIV_HOTBACKUP
1149 /*************************************************************************
1150 Sets a exclusive lock on a record. Used in locking possible duplicate key
1151 records */
1152 static
1153 ulint
1154 row_ins_set_exclusive_rec_lock(
1155 /*===========================*/
1156 /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC,
1157 or error code */
1158 ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or
1159 LOCK_REC_NOT_GAP type lock */
1160 rec_t* rec, /* in: record */
1161 dict_index_t* index, /* in: index */
1162 const ulint* offsets,/* in: rec_get_offsets(rec, index) */
1163 que_thr_t* thr) /* in: query thread */
1165 ulint err;
1167 ut_ad(rec_offs_validate(rec, index, offsets));
1169 if (index->type & DICT_CLUSTERED) {
1170 err = lock_clust_rec_read_check_and_lock(
1171 0, rec, index, offsets, LOCK_X, type, thr);
1172 } else {
1173 err = lock_sec_rec_read_check_and_lock(
1174 0, rec, index, offsets, LOCK_X, type, thr);
1177 return(err);
1179 #endif /* !UNIV_HOTBACKUP */
1181 /*******************************************************************
1182 Checks if foreign key constraint fails for an index entry. Sets shared locks
1183 which lock either the success or the failure of the constraint. NOTE that
1184 the caller must have a shared latch on dict_operation_lock. */
1186 ulint
1187 row_ins_check_foreign_constraint(
1188 /*=============================*/
1189 /* out: DB_SUCCESS,
1190 DB_NO_REFERENCED_ROW,
1191 or DB_ROW_IS_REFERENCED */
1192 ibool check_ref,/* in: TRUE if we want to check that
1193 the referenced table is ok, FALSE if we
1194 want to to check the foreign key table */
1195 dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the
1196 tables mentioned in it must be in the
1197 dictionary cache if they exist at all */
1198 dict_table_t* table, /* in: if check_ref is TRUE, then the foreign
1199 table, else the referenced table */
1200 dtuple_t* entry, /* in: index entry for index */
1201 que_thr_t* thr) /* in: query thread */
1203 upd_node_t* upd_node;
1204 dict_table_t* check_table;
1205 dict_index_t* check_index;
1206 ulint n_fields_cmp;
1207 btr_pcur_t pcur;
1208 int cmp;
1209 ulint err;
1210 ulint i;
1211 mtr_t mtr;
1212 trx_t* trx = thr_get_trx(thr);
1213 mem_heap_t* heap = NULL;
1214 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1215 ulint* offsets = offsets_;
1216 *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1218 run_again:
1219 #ifdef UNIV_SYNC_DEBUG
1220 ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
1221 #endif /* UNIV_SYNC_DEBUG */
1223 err = DB_SUCCESS;
1225 if (trx->check_foreigns == FALSE) {
1226 /* The user has suppressed foreign key checks currently for
1227 this session */
1228 goto exit_func;
1231 /* If any of the foreign key fields in entry is SQL NULL, we
1232 suppress the foreign key check: this is compatible with Oracle,
1233 for example */
1235 for (i = 0; i < foreign->n_fields; i++) {
1236 if (UNIV_SQL_NULL == dfield_get_len(
1237 dtuple_get_nth_field(entry, i))) {
1239 goto exit_func;
1243 if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
1244 upd_node = thr->run_node;
1246 if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
1247 /* If a cascaded update is done as defined by a
1248 foreign key constraint, do not check that
1249 constraint for the child row. In ON UPDATE CASCADE
1250 the update of the parent row is only half done when
1251 we come here: if we would check the constraint here
1252 for the child row it would fail.
1254 A QUESTION remains: if in the child table there are
1255 several constraints which refer to the same parent
1256 table, we should merge all updates to the child as
1257 one update? And the updates can be contradictory!
1258 Currently we just perform the update associated
1259 with each foreign key constraint, one after
1260 another, and the user has problems predicting in
1261 which order they are performed. */
1263 goto exit_func;
1267 if (check_ref) {
1268 check_table = foreign->referenced_table;
1269 check_index = foreign->referenced_index;
1270 } else {
1271 check_table = foreign->foreign_table;
1272 check_index = foreign->foreign_index;
1275 if (check_table == NULL || check_table->ibd_file_missing) {
1276 if (check_ref) {
1277 FILE* ef = dict_foreign_err_file;
1279 row_ins_set_detailed(trx, foreign);
1281 mutex_enter(&dict_foreign_err_mutex);
1282 rewind(ef);
1283 ut_print_timestamp(ef);
1284 fputs(" Transaction:\n", ef);
1285 trx_print(ef, trx, 600);
1286 fputs("Foreign key constraint fails for table ", ef);
1287 ut_print_name(ef, trx, TRUE,
1288 foreign->foreign_table_name);
1289 fputs(":\n", ef);
1290 dict_print_info_on_foreign_key_in_create_format(
1291 ef, trx, foreign, TRUE);
1292 fputs("\nTrying to add to index ", ef);
1293 ut_print_name(ef, trx, FALSE,
1294 foreign->foreign_index->name);
1295 fputs(" tuple:\n", ef);
1296 dtuple_print(ef, entry);
1297 fputs("\nBut the parent table ", ef);
1298 ut_print_name(ef, trx, TRUE,
1299 foreign->referenced_table_name);
1300 fputs("\nor its .ibd file does"
1301 " not currently exist!\n", ef);
1302 mutex_exit(&dict_foreign_err_mutex);
1304 err = DB_NO_REFERENCED_ROW;
1307 goto exit_func;
1310 ut_a(check_table);
1311 ut_a(check_index);
1313 if (check_table != table) {
1314 /* We already have a LOCK_IX on table, but not necessarily
1315 on check_table */
1317 err = lock_table(0, check_table, LOCK_IS, thr);
1319 if (err != DB_SUCCESS) {
1321 goto do_possible_lock_wait;
1325 mtr_start(&mtr);
1327 /* Store old value on n_fields_cmp */
1329 n_fields_cmp = dtuple_get_n_fields_cmp(entry);
1331 dtuple_set_n_fields_cmp(entry, foreign->n_fields);
1333 btr_pcur_open(check_index, entry, PAGE_CUR_GE,
1334 BTR_SEARCH_LEAF, &pcur, &mtr);
1336 /* Scan index records and check if there is a matching record */
1338 do {
1339 rec_t* rec = btr_pcur_get_rec(&pcur);
1341 if (page_rec_is_infimum(rec)) {
1343 continue;
1346 offsets = rec_get_offsets(rec, check_index,
1347 offsets, ULINT_UNDEFINED, &heap);
1349 if (page_rec_is_supremum(rec)) {
1351 err = row_ins_set_shared_rec_lock(
1352 LOCK_ORDINARY, rec, check_index, offsets, thr);
1353 switch (err) {
1354 case DB_SUCCESS_LOCKED_REC:
1355 case DB_SUCCESS:
1356 continue;
1357 default:
1358 goto end_scan;
1362 cmp = cmp_dtuple_rec(entry, rec, offsets);
1364 if (cmp == 0) {
1365 if (rec_get_deleted_flag(rec,
1366 rec_offs_comp(offsets))) {
1367 err = row_ins_set_shared_rec_lock(
1368 LOCK_ORDINARY, rec, check_index,
1369 offsets, thr);
1370 switch (err) {
1371 case DB_SUCCESS_LOCKED_REC:
1372 case DB_SUCCESS:
1373 break;
1374 default:
1375 goto end_scan;
1377 } else {
1378 /* Found a matching record. Lock only
1379 a record because we can allow inserts
1380 into gaps */
1382 err = row_ins_set_shared_rec_lock(
1383 LOCK_REC_NOT_GAP, rec, check_index,
1384 offsets, thr);
1386 switch (err) {
1387 case DB_SUCCESS_LOCKED_REC:
1388 case DB_SUCCESS:
1389 break;
1390 default:
1391 goto end_scan;
1394 if (check_ref) {
1395 err = DB_SUCCESS;
1397 goto end_scan;
1398 } else if (foreign->type != 0) {
1399 /* There is an ON UPDATE or ON DELETE
1400 condition: check them in a separate
1401 function */
1403 err = row_ins_foreign_check_on_constraint(
1404 thr, foreign, &pcur, entry,
1405 &mtr);
1406 if (err != DB_SUCCESS) {
1407 /* Since reporting a plain
1408 "duplicate key" error
1409 message to the user in
1410 cases where a long CASCADE
1411 operation would lead to a
1412 duplicate key in some
1413 other table is very
1414 confusing, map duplicate
1415 key errors resulting from
1416 FK constraints to a
1417 separate error code. */
1419 if (err == DB_DUPLICATE_KEY) {
1420 err = DB_FOREIGN_DUPLICATE_KEY;
1423 goto end_scan;
1425 } else {
1426 row_ins_foreign_report_err(
1427 "Trying to delete or update",
1428 thr, foreign, rec, entry);
1430 err = DB_ROW_IS_REFERENCED;
1431 goto end_scan;
1434 } else {
1435 ut_a(cmp < 0);
1437 err = row_ins_set_shared_rec_lock(
1438 LOCK_GAP, rec, check_index, offsets, thr);
1439 switch (err) {
1440 case DB_SUCCESS_LOCKED_REC:
1441 case DB_SUCCESS:
1442 if (check_ref) {
1443 err = DB_NO_REFERENCED_ROW;
1444 row_ins_foreign_report_add_err(
1445 trx, foreign, rec, entry);
1446 } else {
1447 err = DB_SUCCESS;
1451 goto end_scan;
1453 } while (btr_pcur_move_to_next(&pcur, &mtr));
1455 if (check_ref) {
1456 row_ins_foreign_report_add_err(
1457 trx, foreign, btr_pcur_get_rec(&pcur), entry);
1458 err = DB_NO_REFERENCED_ROW;
1459 } else {
1460 err = DB_SUCCESS;
1463 end_scan:
1464 btr_pcur_close(&pcur);
1466 mtr_commit(&mtr);
1468 /* Restore old value */
1469 dtuple_set_n_fields_cmp(entry, n_fields_cmp);
1471 do_possible_lock_wait:
1472 if (err == DB_LOCK_WAIT) {
1473 trx->error_state = err;
1475 que_thr_stop_for_mysql(thr);
1477 srv_suspend_mysql_thread(thr);
1479 if (trx->error_state == DB_SUCCESS) {
1481 goto run_again;
1484 err = trx->error_state;
1487 exit_func:
1488 if (UNIV_LIKELY_NULL(heap)) {
1489 mem_heap_free(heap);
1491 return(err);
1494 /*******************************************************************
1495 Checks if foreign key constraints fail for an index entry. If index
1496 is not mentioned in any constraint, this function does nothing,
1497 Otherwise does searches to the indexes of referenced tables and
1498 sets shared locks which lock either the success or the failure of
1499 a constraint. */
1500 static
1501 ulint
1502 row_ins_check_foreign_constraints(
1503 /*==============================*/
1504 /* out: DB_SUCCESS or error code */
1505 dict_table_t* table, /* in: table */
1506 dict_index_t* index, /* in: index */
1507 dtuple_t* entry, /* in: index entry for index */
1508 que_thr_t* thr) /* in: query thread */
1510 dict_foreign_t* foreign;
1511 ulint err;
1512 trx_t* trx;
1513 ibool got_s_lock = FALSE;
1515 trx = thr_get_trx(thr);
1517 foreign = UT_LIST_GET_FIRST(table->foreign_list);
1519 while (foreign) {
1520 if (foreign->foreign_index == index) {
1522 if (foreign->referenced_table == NULL) {
1523 dict_table_get(foreign->referenced_table_name,
1524 FALSE);
1527 if (0 == trx->dict_operation_lock_mode) {
1528 got_s_lock = TRUE;
1530 row_mysql_freeze_data_dictionary(trx);
1533 if (foreign->referenced_table) {
1534 mutex_enter(&(dict_sys->mutex));
1536 (foreign->referenced_table
1537 ->n_foreign_key_checks_running)++;
1539 mutex_exit(&(dict_sys->mutex));
1542 /* NOTE that if the thread ends up waiting for a lock
1543 we will release dict_operation_lock temporarily!
1544 But the counter on the table protects the referenced
1545 table from being dropped while the check is running. */
1547 err = row_ins_check_foreign_constraint(
1548 TRUE, foreign, table, entry, thr);
1550 if (foreign->referenced_table) {
1551 mutex_enter(&(dict_sys->mutex));
1553 ut_a(foreign->referenced_table
1554 ->n_foreign_key_checks_running > 0);
1555 (foreign->referenced_table
1556 ->n_foreign_key_checks_running)--;
1558 mutex_exit(&(dict_sys->mutex));
1561 if (got_s_lock) {
1562 row_mysql_unfreeze_data_dictionary(trx);
1565 if (err != DB_SUCCESS) {
1566 return(err);
1570 foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
1573 return(DB_SUCCESS);
1576 #ifndef UNIV_HOTBACKUP
1577 /*******************************************************************
1578 Checks if a unique key violation to rec would occur at the index entry
1579 insert. */
1580 static
1581 ibool
1582 row_ins_dupl_error_with_rec(
1583 /*========================*/
1584 /* out: TRUE if error */
1585 rec_t* rec, /* in: user record; NOTE that we assume
1586 that the caller already has a record lock on
1587 the record! */
1588 dtuple_t* entry, /* in: entry to insert */
1589 dict_index_t* index, /* in: index */
1590 const ulint* offsets)/* in: rec_get_offsets(rec, index) */
1592 ulint matched_fields;
1593 ulint matched_bytes;
1594 ulint n_unique;
1595 ulint i;
1597 ut_ad(rec_offs_validate(rec, index, offsets));
1599 n_unique = dict_index_get_n_unique(index);
1601 matched_fields = 0;
1602 matched_bytes = 0;
1604 cmp_dtuple_rec_with_match(entry, rec, offsets,
1605 &matched_fields, &matched_bytes);
1607 if (matched_fields < n_unique) {
1609 return(FALSE);
1612 /* In a unique secondary index we allow equal key values if they
1613 contain SQL NULLs */
1615 if (!(index->type & DICT_CLUSTERED)) {
1617 for (i = 0; i < n_unique; i++) {
1618 if (UNIV_SQL_NULL == dfield_get_len(
1619 dtuple_get_nth_field(entry, i))) {
1621 return(FALSE);
1626 return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
1628 #endif /* !UNIV_HOTBACKUP */
1630 /*******************************************************************
1631 Scans a unique non-clustered index at a given index entry to determine
1632 whether a uniqueness violation has occurred for the key value of the entry.
1633 Set shared locks on possible duplicate records. */
1634 static
1635 ulint
1636 row_ins_scan_sec_index_for_duplicate(
1637 /*=================================*/
1638 /* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
1639 DB_LOCK_WAIT */
1640 dict_index_t* index, /* in: non-clustered unique index */
1641 dtuple_t* entry, /* in: index entry */
1642 que_thr_t* thr) /* in: query thread */
1644 #ifndef UNIV_HOTBACKUP
1645 ulint n_unique;
1646 ulint i;
1647 int cmp;
1648 ulint n_fields_cmp;
1649 btr_pcur_t pcur;
1650 ulint err = DB_SUCCESS;
1651 unsigned allow_duplicates;
1652 mtr_t mtr;
1653 mem_heap_t* heap = NULL;
1654 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1655 ulint* offsets = offsets_;
1656 *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1658 n_unique = dict_index_get_n_unique(index);
1660 /* If the secondary index is unique, but one of the fields in the
1661 n_unique first fields is NULL, a unique key violation cannot occur,
1662 since we define NULL != NULL in this case */
1664 for (i = 0; i < n_unique; i++) {
1665 if (UNIV_SQL_NULL == dfield_get_len(
1666 dtuple_get_nth_field(entry, i))) {
1668 return(DB_SUCCESS);
1672 mtr_start(&mtr);
1674 /* Store old value on n_fields_cmp */
1676 n_fields_cmp = dtuple_get_n_fields_cmp(entry);
1678 dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index));
1680 btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
1682 allow_duplicates = thr_get_trx(thr)->duplicates;
1684 /* Scan index records and check if there is a duplicate */
1686 do {
1687 rec_t* rec = btr_pcur_get_rec(&pcur);
1689 if (page_rec_is_infimum(rec)) {
1691 continue;
1694 offsets = rec_get_offsets(rec, index, offsets,
1695 ULINT_UNDEFINED, &heap);
1697 if (allow_duplicates) {
1699 /* If the SQL-query will update or replace
1700 duplicate key we will take X-lock for
1701 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1702 INSERT ON DUPLICATE KEY UPDATE). */
1704 err = row_ins_set_exclusive_rec_lock(
1705 LOCK_ORDINARY, rec, index, offsets, thr);
1706 } else {
1708 err = row_ins_set_shared_rec_lock(
1709 LOCK_ORDINARY, rec, index, offsets, thr);
1712 switch (err) {
1713 case DB_SUCCESS_LOCKED_REC:
1714 err = DB_SUCCESS;
1715 case DB_SUCCESS:
1716 break;
1717 default:
1718 goto end_scan;
1721 if (page_rec_is_supremum(rec)) {
1723 continue;
1726 cmp = cmp_dtuple_rec(entry, rec, offsets);
1728 if (cmp == 0) {
1729 if (row_ins_dupl_error_with_rec(rec, entry,
1730 index, offsets)) {
1731 err = DB_DUPLICATE_KEY;
1733 thr_get_trx(thr)->error_info = index;
1735 goto end_scan;
1737 } else {
1738 ut_a(cmp < 0);
1739 goto end_scan;
1741 } while (btr_pcur_move_to_next(&pcur, &mtr));
1743 end_scan:
1744 if (UNIV_LIKELY_NULL(heap)) {
1745 mem_heap_free(heap);
1747 mtr_commit(&mtr);
1749 /* Restore old value */
1750 dtuple_set_n_fields_cmp(entry, n_fields_cmp);
1752 return(err);
1753 #else /* UNIV_HOTBACKUP */
1754 /* This function depends on MySQL code that is not included in
1755 InnoDB Hot Backup builds. Besides, this function should never
1756 be called in InnoDB Hot Backup. */
1757 ut_error;
1758 return(DB_FAIL);
1759 #endif /* UNIV_HOTBACKUP */
1762 /*******************************************************************
1763 Checks if a unique key violation error would occur at an index entry
1764 insert. Sets shared locks on possible duplicate records. Works only
1765 for a clustered index! */
1766 static
1767 ulint
1768 row_ins_duplicate_error_in_clust(
1769 /*=============================*/
1770 /* out: DB_SUCCESS if no error,
1771 DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
1772 have to wait for a lock on a possible
1773 duplicate record */
1774 btr_cur_t* cursor, /* in: B-tree cursor */
1775 dtuple_t* entry, /* in: entry to insert */
1776 que_thr_t* thr, /* in: query thread */
1777 mtr_t* mtr) /* in: mtr */
1779 #ifndef UNIV_HOTBACKUP
1780 ulint err;
1781 rec_t* rec;
1782 ulint n_unique;
1783 trx_t* trx = thr_get_trx(thr);
1784 mem_heap_t*heap = NULL;
1785 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1786 ulint* offsets = offsets_;
1787 *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1789 UT_NOT_USED(mtr);
1791 ut_a(cursor->index->type & DICT_CLUSTERED);
1792 ut_ad(cursor->index->type & DICT_UNIQUE);
1794 /* NOTE: For unique non-clustered indexes there may be any number
1795 of delete marked records with the same value for the non-clustered
1796 index key (remember multiversioning), and which differ only in
1797 the row refererence part of the index record, containing the
1798 clustered index key fields. For such a secondary index record,
1799 to avoid race condition, we must FIRST do the insertion and after
1800 that check that the uniqueness condition is not breached! */
1802 /* NOTE: A problem is that in the B-tree node pointers on an
1803 upper level may match more to the entry than the actual existing
1804 user records on the leaf level. So, even if low_match would suggest
1805 that a duplicate key violation may occur, this may not be the case. */
1807 n_unique = dict_index_get_n_unique(cursor->index);
1809 if (cursor->low_match >= n_unique) {
1811 rec = btr_cur_get_rec(cursor);
1813 if (!page_rec_is_infimum(rec)) {
1814 offsets = rec_get_offsets(rec, cursor->index, offsets,
1815 ULINT_UNDEFINED, &heap);
1817 /* We set a lock on the possible duplicate: this
1818 is needed in logical logging of MySQL to make
1819 sure that in roll-forward we get the same duplicate
1820 errors as in original execution */
1822 if (trx->duplicates) {
1824 /* If the SQL-query will update or replace
1825 duplicate key we will take X-lock for
1826 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1827 INSERT ON DUPLICATE KEY UPDATE). */
1829 err = row_ins_set_exclusive_rec_lock(
1830 LOCK_REC_NOT_GAP, rec,
1831 cursor->index, offsets, thr);
1832 } else {
1834 err = row_ins_set_shared_rec_lock(
1835 LOCK_REC_NOT_GAP, rec,
1836 cursor->index, offsets, thr);
1839 switch (err) {
1840 case DB_SUCCESS_LOCKED_REC:
1841 case DB_SUCCESS:
1842 break;
1843 default:
1844 goto func_exit;
1847 if (row_ins_dupl_error_with_rec(
1848 rec, entry, cursor->index, offsets)) {
1849 trx->error_info = cursor->index;
1850 err = DB_DUPLICATE_KEY;
1851 goto func_exit;
1856 if (cursor->up_match >= n_unique) {
1858 rec = page_rec_get_next(btr_cur_get_rec(cursor));
1860 if (!page_rec_is_supremum(rec)) {
1861 offsets = rec_get_offsets(rec, cursor->index, offsets,
1862 ULINT_UNDEFINED, &heap);
1864 if (trx->duplicates) {
1866 /* If the SQL-query will update or replace
1867 duplicate key we will take X-lock for
1868 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1869 INSERT ON DUPLICATE KEY UPDATE). */
1871 err = row_ins_set_exclusive_rec_lock(
1872 LOCK_REC_NOT_GAP, rec,
1873 cursor->index, offsets, thr);
1874 } else {
1876 err = row_ins_set_shared_rec_lock(
1877 LOCK_REC_NOT_GAP, rec,
1878 cursor->index, offsets, thr);
1881 switch (err) {
1882 case DB_SUCCESS_LOCKED_REC:
1883 case DB_SUCCESS:
1884 break;
1885 default:
1886 goto func_exit;
1889 if (row_ins_dupl_error_with_rec(
1890 rec, entry, cursor->index, offsets)) {
1891 trx->error_info = cursor->index;
1892 err = DB_DUPLICATE_KEY;
1893 goto func_exit;
1897 ut_a(!(cursor->index->type & DICT_CLUSTERED));
1898 /* This should never happen */
1901 err = DB_SUCCESS;
1902 func_exit:
1903 if (UNIV_LIKELY_NULL(heap)) {
1904 mem_heap_free(heap);
1906 return(err);
1907 #else /* UNIV_HOTBACKUP */
1908 /* This function depends on MySQL code that is not included in
1909 InnoDB Hot Backup builds. Besides, this function should never
1910 be called in InnoDB Hot Backup. */
1911 ut_error;
1912 return(DB_FAIL);
1913 #endif /* UNIV_HOTBACKUP */
1916 /*******************************************************************
1917 Checks if an index entry has long enough common prefix with an existing
1918 record so that the intended insert of the entry must be changed to a modify of
1919 the existing record. In the case of a clustered index, the prefix must be
1920 n_unique fields long, and in the case of a secondary index, all fields must be
1921 equal. */
1922 UNIV_INLINE
1923 ulint
1924 row_ins_must_modify(
1925 /*================*/
1926 /* out: 0 if no update, ROW_INS_PREV if
1927 previous should be updated; currently we
1928 do the search so that only the low_match
1929 record can match enough to the search tuple,
1930 not the next record */
1931 btr_cur_t* cursor) /* in: B-tree cursor */
1933 ulint enough_match;
1934 rec_t* rec;
1936 /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
1937 pointers on upper levels of the B-tree may match more to entry than
1938 to actual user records on the leaf level, we have to check if the
1939 candidate record is actually a user record. In a clustered index
1940 node pointers contain index->n_unique first fields, and in the case
1941 of a secondary index, all fields of the index. */
1943 enough_match = dict_index_get_n_unique_in_tree(cursor->index);
1945 if (cursor->low_match >= enough_match) {
1947 rec = btr_cur_get_rec(cursor);
1949 if (!page_rec_is_infimum(rec)) {
1951 return(ROW_INS_PREV);
1955 return(0);
1958 /*******************************************************************
1959 Tries to insert an index entry to an index. If the index is clustered
1960 and a record with the same unique key is found, the other record is
1961 necessarily marked deleted by a committed transaction, or a unique key
1962 violation error occurs. The delete marked record is then updated to an
1963 existing record, and we must write an undo log record on the delete
1964 marked record. If the index is secondary, and a record with exactly the
1965 same fields is found, the other record is necessarily marked deleted.
1966 It is then unmarked. Otherwise, the entry is just inserted to the index. */
1968 ulint
1969 row_ins_index_entry_low(
1970 /*====================*/
1971 /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
1972 if pessimistic retry needed, or error code */
1973 ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
1974 depending on whether we wish optimistic or
1975 pessimistic descent down the index tree */
1976 dict_index_t* index, /* in: index */
1977 dtuple_t* entry, /* in: index entry to insert */
1978 ulint* ext_vec,/* in: array containing field numbers of
1979 externally stored fields in entry, or NULL */
1980 ulint n_ext_vec,/* in: number of fields in ext_vec */
1981 que_thr_t* thr) /* in: query thread */
1983 btr_cur_t cursor;
1984 ulint ignore_sec_unique = 0;
1985 ulint modify = 0; /* remove warning */
1986 rec_t* insert_rec;
1987 rec_t* rec;
1988 ulint err;
1989 ulint n_unique;
1990 big_rec_t* big_rec = NULL;
1991 mtr_t mtr;
1992 mem_heap_t* heap = NULL;
1993 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1994 ulint* offsets = offsets_;
1995 *offsets_ = (sizeof offsets_) / sizeof *offsets_;
1997 log_free_check();
1999 mtr_start(&mtr);
2001 cursor.thr = thr;
2003 /* Note that we use PAGE_CUR_LE as the search mode, because then
2004 the function will return in both low_match and up_match of the
2005 cursor sensible values */
2007 if (!(thr_get_trx(thr)->check_unique_secondary)) {
2008 ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE;
2011 btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
2012 mode | BTR_INSERT | ignore_sec_unique,
2013 &cursor, 0, &mtr);
2015 if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
2016 /* The insertion was made to the insert buffer already during
2017 the search: we are done */
2019 err = DB_SUCCESS;
2021 goto function_exit;
2024 #ifdef UNIV_DEBUG
2026 page_t* page = btr_cur_get_page(&cursor);
2027 rec_t* first_rec = page_rec_get_next(
2028 page_get_infimum_rec(page));
2030 if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) {
2031 ut_a(rec_get_n_fields(first_rec, index)
2032 == dtuple_get_n_fields(entry));
2035 #endif
2037 n_unique = dict_index_get_n_unique(index);
2039 if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
2040 || cursor.low_match >= n_unique)) {
2042 if (index->type & DICT_CLUSTERED) {
2043 /* Note that the following may return also
2044 DB_LOCK_WAIT */
2046 err = row_ins_duplicate_error_in_clust(
2047 &cursor, entry, thr, &mtr);
2048 if (err != DB_SUCCESS) {
2050 goto function_exit;
2052 } else {
2053 mtr_commit(&mtr);
2054 err = row_ins_scan_sec_index_for_duplicate(
2055 index, entry, thr);
2056 mtr_start(&mtr);
2058 if (err != DB_SUCCESS) {
2060 goto function_exit;
2063 /* We did not find a duplicate and we have now
2064 locked with s-locks the necessary records to
2065 prevent any insertion of a duplicate by another
2066 transaction. Let us now reposition the cursor and
2067 continue the insertion. */
2069 btr_cur_search_to_nth_level(index, 0, entry,
2070 PAGE_CUR_LE,
2071 mode | BTR_INSERT,
2072 &cursor, 0, &mtr);
2076 modify = row_ins_must_modify(&cursor);
2078 if (modify != 0) {
2079 /* There is already an index entry with a long enough common
2080 prefix, we must convert the insert into a modify of an
2081 existing record */
2083 if (modify == ROW_INS_NEXT) {
2084 rec = page_rec_get_next(btr_cur_get_rec(&cursor));
2086 btr_cur_position(index, rec, &cursor);
2089 if (index->type & DICT_CLUSTERED) {
2090 err = row_ins_clust_index_entry_by_modify(
2091 mode, &cursor, &big_rec, entry,
2092 ext_vec, n_ext_vec, thr, &mtr);
2093 } else {
2094 err = row_ins_sec_index_entry_by_modify(
2095 mode, &cursor, entry, thr, &mtr);
2098 } else {
2099 if (mode == BTR_MODIFY_LEAF) {
2100 err = btr_cur_optimistic_insert(
2101 0, &cursor, entry, &insert_rec, &big_rec,
2102 thr, &mtr);
2103 } else {
2104 ut_a(mode == BTR_MODIFY_TREE);
2105 if (buf_LRU_buf_pool_running_out()) {
2107 err = DB_LOCK_TABLE_FULL;
2109 goto function_exit;
2111 err = btr_cur_pessimistic_insert(
2112 0, &cursor, entry, &insert_rec, &big_rec,
2113 thr, &mtr);
2116 if (err == DB_SUCCESS) {
2117 if (ext_vec) {
2118 rec_set_field_extern_bits(insert_rec, index,
2119 ext_vec, n_ext_vec,
2120 &mtr);
2125 function_exit:
2126 mtr_commit(&mtr);
2128 if (big_rec) {
2129 rec_t* rec;
2131 DBUG_EXECUTE_IF(
2132 "row_ins_extern_checkpoint",
2133 log_make_checkpoint_at(ut_dulint_max, TRUE););
2135 mtr_start(&mtr);
2137 DEBUG_SYNC_C("before_row_ins_extern_latch");
2138 btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
2139 BTR_MODIFY_TREE, &cursor, 0, &mtr);
2140 rec = btr_cur_get_rec(&cursor);
2141 offsets = rec_get_offsets(rec, index, offsets,
2142 ULINT_UNDEFINED, &heap);
2144 DEBUG_SYNC_C("before_row_ins_upd_extern");
2145 err = btr_store_big_rec_extern_fields(index, rec,
2146 offsets, big_rec, &mtr);
2147 DEBUG_SYNC_C("after_row_ins_upd_extern");
2149 if (modify) {
2150 dtuple_big_rec_free(big_rec);
2151 } else {
2152 dtuple_convert_back_big_rec(index, entry, big_rec);
2155 mtr_commit(&mtr);
2158 if (UNIV_LIKELY_NULL(heap)) {
2159 mem_heap_free(heap);
2161 return(err);
2164 /*******************************************************************
2165 Inserts an index entry to index. Tries first optimistic, then pessimistic
2166 descent down the tree. If the entry matches enough to a delete marked record,
2167 performs the insert by updating or delete unmarking the delete marked
2168 record. */
2170 ulint
2171 row_ins_index_entry(
2172 /*================*/
2173 /* out: DB_SUCCESS, DB_LOCK_WAIT,
2174 DB_DUPLICATE_KEY, or some other error code */
2175 dict_index_t* index, /* in: index */
2176 dtuple_t* entry, /* in: index entry to insert */
2177 ulint* ext_vec,/* in: array containing field numbers of
2178 externally stored fields in entry, or NULL */
2179 ulint n_ext_vec,/* in: number of fields in ext_vec */
2180 que_thr_t* thr) /* in: query thread */
2182 ulint err;
2184 if (UT_LIST_GET_FIRST(index->table->foreign_list)) {
2185 err = row_ins_check_foreign_constraints(index->table, index,
2186 entry, thr);
2187 if (err != DB_SUCCESS) {
2189 return(err);
2193 /* Try first optimistic descent to the B-tree */
2195 err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
2196 ext_vec, n_ext_vec, thr);
2197 if (err != DB_FAIL) {
2198 if (index == dict_table_get_first_index(index->table)
2199 && thr_get_trx(thr)->mysql_thd != 0) {
2200 DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
2202 return(err);
2205 /* Try then pessimistic descent to the B-tree */
2207 err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
2208 ext_vec, n_ext_vec, thr);
2209 return(err);
2212 /***************************************************************
2213 Sets the values of the dtuple fields in entry from the values of appropriate
2214 columns in row. */
2215 static
2216 void
2217 row_ins_index_entry_set_vals(
2218 /*=========================*/
2219 dict_index_t* index, /* in: index */
2220 dtuple_t* entry, /* in: index entry to make */
2221 dtuple_t* row) /* in: row */
2223 dict_field_t* ind_field;
2224 dfield_t* field;
2225 dfield_t* row_field;
2226 ulint n_fields;
2227 ulint i;
2229 ut_ad(entry && row);
2231 n_fields = dtuple_get_n_fields(entry);
2233 for (i = 0; i < n_fields; i++) {
2234 field = dtuple_get_nth_field(entry, i);
2235 ind_field = dict_index_get_nth_field(index, i);
2237 row_field = dtuple_get_nth_field(row, ind_field->col->ind);
2239 /* Check column prefix indexes */
2240 if (ind_field->prefix_len > 0
2241 && dfield_get_len(row_field) != UNIV_SQL_NULL) {
2243 const dict_col_t* col
2244 = dict_field_get_col(ind_field);
2246 field->len = dtype_get_at_most_n_mbchars(
2247 col->prtype, col->mbminlen, col->mbmaxlen,
2248 ind_field->prefix_len,
2249 row_field->len, row_field->data);
2250 } else {
2251 field->len = row_field->len;
2254 field->data = row_field->data;
2258 /***************************************************************
2259 Inserts a single index entry to the table. */
2260 static
2261 ulint
2262 row_ins_index_entry_step(
2263 /*=====================*/
2264 /* out: DB_SUCCESS if operation successfully
2265 completed, else error code or DB_LOCK_WAIT */
2266 ins_node_t* node, /* in: row insert node */
2267 que_thr_t* thr) /* in: query thread */
2269 ulint err;
2271 ut_ad(dtuple_check_typed(node->row));
2273 row_ins_index_entry_set_vals(node->index, node->entry, node->row);
2275 ut_ad(dtuple_check_typed(node->entry));
2277 err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
2279 return(err);
2282 /***************************************************************
2283 Allocates a row id for row and inits the node->index field. */
2284 UNIV_INLINE
2285 void
2286 row_ins_alloc_row_id_step(
2287 /*======================*/
2288 ins_node_t* node) /* in: row insert node */
2290 dulint row_id;
2292 ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
2294 if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) {
2296 /* No row id is stored if the clustered index is unique */
2298 return;
2301 /* Fill in row id value to row */
2303 row_id = dict_sys_get_new_row_id();
2305 dict_sys_write_row_id(node->row_id_buf, row_id);
2308 /***************************************************************
2309 Gets a row to insert from the values list. */
2310 UNIV_INLINE
2311 void
2312 row_ins_get_row_from_values(
2313 /*========================*/
2314 ins_node_t* node) /* in: row insert node */
2316 que_node_t* list_node;
2317 dfield_t* dfield;
2318 dtuple_t* row;
2319 ulint i;
2321 /* The field values are copied in the buffers of the select node and
2322 it is safe to use them until we fetch from select again: therefore
2323 we can just copy the pointers */
2325 row = node->row;
2327 i = 0;
2328 list_node = node->values_list;
2330 while (list_node) {
2331 eval_exp(list_node);
2333 dfield = dtuple_get_nth_field(row, i);
2334 dfield_copy_data(dfield, que_node_get_val(list_node));
2336 i++;
2337 list_node = que_node_get_next(list_node);
2341 /***************************************************************
2342 Gets a row to insert from the select list. */
2343 UNIV_INLINE
2344 void
2345 row_ins_get_row_from_select(
2346 /*========================*/
2347 ins_node_t* node) /* in: row insert node */
2349 que_node_t* list_node;
2350 dfield_t* dfield;
2351 dtuple_t* row;
2352 ulint i;
2354 /* The field values are copied in the buffers of the select node and
2355 it is safe to use them until we fetch from select again: therefore
2356 we can just copy the pointers */
2358 row = node->row;
2360 i = 0;
2361 list_node = node->select->select_list;
2363 while (list_node) {
2364 dfield = dtuple_get_nth_field(row, i);
2365 dfield_copy_data(dfield, que_node_get_val(list_node));
2367 i++;
2368 list_node = que_node_get_next(list_node);
2372 /***************************************************************
2373 Inserts a row to a table. */
2375 ulint
2376 row_ins(
2377 /*====*/
2378 /* out: DB_SUCCESS if operation successfully
2379 completed, else error code or DB_LOCK_WAIT */
2380 ins_node_t* node, /* in: row insert node */
2381 que_thr_t* thr) /* in: query thread */
2383 ulint err;
2385 ut_ad(node && thr);
2387 if (node->state == INS_NODE_ALLOC_ROW_ID) {
2389 row_ins_alloc_row_id_step(node);
2391 node->index = dict_table_get_first_index(node->table);
2392 node->entry = UT_LIST_GET_FIRST(node->entry_list);
2394 if (node->ins_type == INS_SEARCHED) {
2396 row_ins_get_row_from_select(node);
2398 } else if (node->ins_type == INS_VALUES) {
2400 row_ins_get_row_from_values(node);
2403 node->state = INS_NODE_INSERT_ENTRIES;
2406 ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
2408 while (node->index != NULL) {
2409 err = row_ins_index_entry_step(node, thr);
2411 if (err != DB_SUCCESS) {
2413 return(err);
2416 node->index = dict_table_get_next_index(node->index);
2417 node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
2420 ut_ad(node->entry == NULL);
2422 node->state = INS_NODE_ALLOC_ROW_ID;
2424 return(DB_SUCCESS);
2427 /***************************************************************
2428 Inserts a row to a table. This is a high-level function used in SQL execution
2429 graphs. */
2431 que_thr_t*
2432 row_ins_step(
2433 /*=========*/
2434 /* out: query thread to run next or NULL */
2435 que_thr_t* thr) /* in: query thread */
2437 ins_node_t* node;
2438 que_node_t* parent;
2439 sel_node_t* sel_node;
2440 trx_t* trx;
2441 ulint err;
2443 ut_ad(thr);
2445 trx = thr_get_trx(thr);
2447 trx_start_if_not_started(trx);
2449 node = thr->run_node;
2451 ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
2453 parent = que_node_get_parent(node);
2454 sel_node = node->select;
2456 if (thr->prev_node == parent) {
2457 node->state = INS_NODE_SET_IX_LOCK;
2460 /* If this is the first time this node is executed (or when
2461 execution resumes after wait for the table IX lock), set an
2462 IX lock on the table and reset the possible select node. MySQL's
2463 partitioned table code may also call an insert within the same
2464 SQL statement AFTER it has used this table handle to do a search.
2465 This happens, for example, when a row update moves it to another
2466 partition. In that case, we have already set the IX lock on the
2467 table during the search operation, and there is no need to set
2468 it again here. But we must write trx->id to node->trx_id_buf. */
2470 trx_write_trx_id(node->trx_id_buf, trx->id);
2472 if (node->state == INS_NODE_SET_IX_LOCK) {
2474 /* It may be that the current session has not yet started
2475 its transaction, or it has been committed: */
2477 if (UT_DULINT_EQ(trx->id, node->trx_id)) {
2478 /* No need to do IX-locking */
2480 goto same_trx;
2483 err = lock_table(0, node->table, LOCK_IX, thr);
2485 if (err != DB_SUCCESS) {
2487 goto error_handling;
2490 node->trx_id = trx->id;
2491 same_trx:
2492 node->state = INS_NODE_ALLOC_ROW_ID;
2494 if (node->ins_type == INS_SEARCHED) {
2495 /* Reset the cursor */
2496 sel_node->state = SEL_NODE_OPEN;
2498 /* Fetch a row to insert */
2500 thr->run_node = sel_node;
2502 return(thr);
2506 if ((node->ins_type == INS_SEARCHED)
2507 && (sel_node->state != SEL_NODE_FETCH)) {
2509 ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2511 /* No more rows to insert */
2512 thr->run_node = parent;
2514 return(thr);
2517 /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
2519 err = row_ins(node, thr);
2521 error_handling:
2522 trx->error_state = err;
2524 if (err != DB_SUCCESS) {
2525 /* err == DB_LOCK_WAIT or SQL error detected */
2526 return(NULL);
2529 /* DO THE TRIGGER ACTIONS HERE */
2531 if (node->ins_type == INS_SEARCHED) {
2532 /* Fetch a row to insert */
2534 thr->run_node = sel_node;
2535 } else {
2536 thr->run_node = que_node_get_parent(node);
2539 return(thr);