1 /******************************************************
6 Created 4/20/1996 Heikki Tuuri
7 *******************************************************/
9 #include "my_global.h" /* HAVE_* */
10 #include "m_string.h" /* for my_sys.h */
11 #include "my_sys.h" /* DEBUG_SYNC_C */
18 #include "dict0dict.h"
19 #include "dict0boot.h"
23 #include "mach0data.h"
29 #include "lock0lock.h"
31 #include "eval0eval.h"
32 #include "data0data.h"
36 #define ROW_INS_PREV 1
37 #define ROW_INS_NEXT 2
40 /*********************************************************************
41 This prototype is copied from /mysql/sql/ha_innodb.cc.
42 Invalidates the MySQL query cache for the table.
43 NOTE that the exact prototype of this function has to be in
44 /innobase/row/row0ins.c! */
47 innobase_invalidate_query_cache(
48 /*============================*/
49 trx_t
* trx
, /* in: transaction which modifies the table */
50 char* full_name
, /* in: concatenation of database name, null
51 char '\0', table name, null char'\0';
52 NOTE that in Windows this is always
54 ulint full_name_len
); /* in: full name length where also the null
57 /*************************************************************************
58 Creates an insert node struct. */
63 /* out, own: insert node struct */
64 ulint ins_type
, /* in: INS_VALUES, ... */
65 dict_table_t
* table
, /* in: table where to insert */
66 mem_heap_t
* heap
) /* in: mem heap where created */
70 node
= mem_heap_alloc(heap
, sizeof(ins_node_t
));
72 node
->common
.type
= QUE_NODE_INSERT
;
74 node
->ins_type
= ins_type
;
76 node
->state
= INS_NODE_SET_IX_LOCK
;
83 node
->trx_id
= ut_dulint_zero
;
85 node
->entry_sys_heap
= mem_heap_create(128);
87 node
->magic_n
= INS_NODE_MAGIC_N
;
92 /***************************************************************
93 Creates an entry template for each index of a table. */
96 ins_node_create_entry_list(
97 /*=======================*/
98 ins_node_t
* node
) /* in: row insert node */
103 ut_ad(node
->entry_sys_heap
);
105 UT_LIST_INIT(node
->entry_list
);
107 index
= dict_table_get_first_index(node
->table
);
109 while (index
!= NULL
) {
110 entry
= row_build_index_entry(node
->row
, index
,
111 node
->entry_sys_heap
);
112 UT_LIST_ADD_LAST(tuple_list
, node
->entry_list
, entry
);
114 index
= dict_table_get_next_index(index
);
118 /*********************************************************************
119 Adds system field buffers to a row. */
122 row_ins_alloc_sys_fields(
123 /*=====================*/
124 ins_node_t
* node
) /* in: insert node */
129 const dict_col_t
* col
;
135 heap
= node
->entry_sys_heap
;
137 ut_ad(row
&& table
&& heap
);
138 ut_ad(dtuple_get_n_fields(row
) == dict_table_get_n_cols(table
));
140 /* 1. Allocate buffer for row id */
142 col
= dict_table_get_sys_col(table
, DATA_ROW_ID
);
144 dfield
= dtuple_get_nth_field(row
, dict_col_get_no(col
));
146 ptr
= mem_heap_zalloc(heap
, DATA_ROW_ID_LEN
);
148 dfield_set_data(dfield
, ptr
, DATA_ROW_ID_LEN
);
150 node
->row_id_buf
= ptr
;
152 /* 3. Allocate buffer for trx id */
154 col
= dict_table_get_sys_col(table
, DATA_TRX_ID
);
156 dfield
= dtuple_get_nth_field(row
, dict_col_get_no(col
));
157 ptr
= mem_heap_zalloc(heap
, DATA_TRX_ID_LEN
);
159 dfield_set_data(dfield
, ptr
, DATA_TRX_ID_LEN
);
161 node
->trx_id_buf
= ptr
;
163 /* 4. Allocate buffer for roll ptr */
165 col
= dict_table_get_sys_col(table
, DATA_ROLL_PTR
);
167 dfield
= dtuple_get_nth_field(row
, dict_col_get_no(col
));
168 ptr
= mem_heap_zalloc(heap
, DATA_ROLL_PTR_LEN
);
170 dfield_set_data(dfield
, ptr
, DATA_ROLL_PTR_LEN
);
173 /*************************************************************************
174 Sets a new row to insert for an INS_DIRECT node. This function is only used
175 if we have constructed the row separately, which is a rare case; this
176 function is quite slow. */
179 ins_node_set_new_row(
180 /*=================*/
181 ins_node_t
* node
, /* in: insert node */
182 dtuple_t
* row
) /* in: new row (or first row) for the node */
184 node
->state
= INS_NODE_SET_IX_LOCK
;
190 mem_heap_empty(node
->entry_sys_heap
);
192 /* Create templates for index entries */
194 ins_node_create_entry_list(node
);
196 /* Allocate from entry_sys_heap buffers for sys fields */
198 row_ins_alloc_sys_fields(node
);
200 /* As we allocated a new trx id buf, the trx id should be written
203 node
->trx_id
= ut_dulint_zero
;
206 /***********************************************************************
207 Does an insert operation by updating a delete-marked existing record
208 in the index. This situation can occur if the delete-marked record is
209 kept in the index for consistent reads. */
212 row_ins_sec_index_entry_by_modify(
213 /*==============================*/
214 /* out: DB_SUCCESS or error code */
215 ulint mode
, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
216 depending on whether mtr holds just a leaf
217 latch or also a tree latch */
218 btr_cur_t
* cursor
, /* in: B-tree cursor */
219 dtuple_t
* entry
, /* in: index entry to insert */
220 que_thr_t
* thr
, /* in: query thread */
221 mtr_t
* mtr
) /* in: mtr */
223 big_rec_t
* dummy_big_rec
;
229 rec
= btr_cur_get_rec(cursor
);
231 ut_ad((cursor
->index
->type
& DICT_CLUSTERED
) == 0);
232 ut_ad(rec_get_deleted_flag(rec
,
233 dict_table_is_comp(cursor
->index
->table
)));
235 /* We know that in the alphabetical ordering, entry and rec are
236 identified. But in their binary form there may be differences if
237 there are char fields in them. Therefore we have to calculate the
240 heap
= mem_heap_create(1024);
242 update
= row_upd_build_sec_rec_difference_binary(
243 cursor
->index
, entry
, rec
, thr_get_trx(thr
), heap
);
244 if (mode
== BTR_MODIFY_LEAF
) {
245 /* Try an optimistic updating of the record, keeping changes
248 err
= btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG
, cursor
,
249 update
, 0, thr
, mtr
);
250 if (err
== DB_OVERFLOW
|| err
== DB_UNDERFLOW
) {
254 ut_a(mode
== BTR_MODIFY_TREE
);
255 if (buf_LRU_buf_pool_running_out()) {
257 err
= DB_LOCK_TABLE_FULL
;
262 err
= btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG
, cursor
,
263 &dummy_big_rec
, update
,
272 /***********************************************************************
273 Does an insert operation by delete unmarking and updating a delete marked
274 existing record in the index. This situation can occur if the delete marked
275 record is kept in the index for consistent reads. */
278 row_ins_clust_index_entry_by_modify(
279 /*================================*/
280 /* out: DB_SUCCESS, DB_FAIL, or error code */
281 ulint mode
, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
282 depending on whether mtr holds just a leaf
283 latch or also a tree latch */
284 btr_cur_t
* cursor
, /* in: B-tree cursor */
285 big_rec_t
** big_rec
,/* out: possible big rec vector of fields
286 which have to be stored externally by the
288 dtuple_t
* entry
, /* in: index entry to insert */
289 ulint
* ext_vec
,/* in: array containing field numbers of
290 externally stored fields in entry, or NULL */
291 ulint n_ext_vec
,/* in: number of fields in ext_vec */
292 que_thr_t
* thr
, /* in: query thread */
293 mtr_t
* mtr
) /* in: mtr */
300 ut_ad(cursor
->index
->type
& DICT_CLUSTERED
);
304 rec
= btr_cur_get_rec(cursor
);
306 ut_ad(rec_get_deleted_flag(rec
,
307 dict_table_is_comp(cursor
->index
->table
)));
309 heap
= mem_heap_create(1024);
311 /* Build an update vector containing all the fields to be modified;
312 NOTE that this vector may NOT contain system columns trx_id or
315 update
= row_upd_build_difference_binary(cursor
->index
, entry
, ext_vec
,
317 thr_get_trx(thr
), heap
);
318 if (mode
== BTR_MODIFY_LEAF
) {
319 /* Try optimistic updating of the record, keeping changes
322 err
= btr_cur_optimistic_update(0, cursor
, update
, 0, thr
,
324 if (err
== DB_OVERFLOW
|| err
== DB_UNDERFLOW
) {
328 ut_a(mode
== BTR_MODIFY_TREE
);
329 if (buf_LRU_buf_pool_running_out()) {
331 err
= DB_LOCK_TABLE_FULL
;
335 err
= btr_cur_pessimistic_update(0, cursor
, big_rec
, update
,
344 /*************************************************************************
345 Returns TRUE if in a cascaded update/delete an ancestor node of node
346 updates (not DELETE, but UPDATE) table. */
349 row_ins_cascade_ancestor_updates_table(
350 /*===================================*/
351 /* out: TRUE if an ancestor updates table */
352 que_node_t
* node
, /* in: node in a query graph */
353 dict_table_t
* table
) /* in: table */
356 upd_node_t
* upd_node
;
358 parent
= que_node_get_parent(node
);
360 while (que_node_get_type(parent
) == QUE_NODE_UPDATE
) {
364 if (upd_node
->table
== table
&& upd_node
->is_delete
== FALSE
) {
369 parent
= que_node_get_parent(parent
);
377 /*************************************************************************
378 Returns the number of ancestor UPDATE or DELETE nodes of a
379 cascaded update/delete node. */
382 row_ins_cascade_n_ancestors(
383 /*========================*/
384 /* out: number of ancestors */
385 que_node_t
* node
) /* in: node in a query graph */
388 ulint n_ancestors
= 0;
390 parent
= que_node_get_parent(node
);
392 while (que_node_get_type(parent
) == QUE_NODE_UPDATE
) {
395 parent
= que_node_get_parent(parent
);
403 /**********************************************************************
404 Calculates the update vector node->cascade->update for a child table in
405 a cascaded update. */
408 row_ins_cascade_calc_update_vec(
409 /*============================*/
410 /* out: number of fields in the
411 calculated update vector; the value
412 can also be 0 if no foreign key
413 fields changed; the returned value
414 is ULINT_UNDEFINED if the column
415 type in the child table is too short
416 to fit the new value in the parent
417 table: that means the update fails */
418 upd_node_t
* node
, /* in: update node of the parent
420 dict_foreign_t
* foreign
, /* in: foreign key constraint whose
422 mem_heap_t
* heap
) /* in: memory heap to use as
425 upd_node_t
* cascade
= node
->cascade_node
;
426 dict_table_t
* table
= foreign
->foreign_table
;
427 dict_index_t
* index
= foreign
->foreign_index
;
429 dict_table_t
* parent_table
;
430 dict_index_t
* parent_index
;
431 upd_t
* parent_update
;
432 ulint n_fields_updated
;
433 ulint parent_field_no
;
443 /* Calculate the appropriate update vector which will set the fields
444 in the child index record to the same value (possibly padded with
445 spaces if the column is a fixed length CHAR or FIXBINARY column) as
446 the referenced index record will get in the update. */
448 parent_table
= node
->table
;
449 ut_a(parent_table
== foreign
->referenced_table
);
450 parent_index
= foreign
->referenced_index
;
451 parent_update
= node
->update
;
453 update
= cascade
->update
;
455 update
->info_bits
= 0;
456 update
->n_fields
= foreign
->n_fields
;
458 n_fields_updated
= 0;
460 for (i
= 0; i
< foreign
->n_fields
; i
++) {
462 parent_field_no
= dict_table_get_nth_col_pos(
464 dict_index_get_nth_col_no(parent_index
, i
));
466 for (j
= 0; j
< parent_update
->n_fields
; j
++) {
467 const upd_field_t
* parent_ufield
468 = &parent_update
->fields
[j
];
470 if (parent_ufield
->field_no
== parent_field_no
) {
473 const dict_col_t
* col
;
476 col
= dict_index_get_nth_col(index
, i
);
478 /* A field in the parent index record is
479 updated. Let us make the update vector
480 field for the child table. */
482 ufield
= update
->fields
+ n_fields_updated
;
485 = dict_table_get_nth_col_pos(
486 table
, dict_col_get_no(col
));
489 ufield
->new_val
= parent_ufield
->new_val
;
491 /* Do not allow a NOT NULL column to be
494 if (ufield
->new_val
.len
== UNIV_SQL_NULL
495 && (col
->prtype
& DATA_NOT_NULL
)) {
497 return(ULINT_UNDEFINED
);
500 /* If the new value would not fit in the
501 column, do not allow the update */
503 if (ufield
->new_val
.len
!= UNIV_SQL_NULL
504 && dtype_get_at_most_n_mbchars(
506 col
->mbminlen
, col
->mbmaxlen
,
509 ufield
->new_val
.data
)
510 < ufield
->new_val
.len
) {
512 return(ULINT_UNDEFINED
);
515 /* If the parent column type has a different
516 length than the child column type, we may
517 need to pad with spaces the new value of the
520 min_size
= dict_col_get_min_size(col
);
523 && ufield
->new_val
.len
!= UNIV_SQL_NULL
524 && ufield
->new_val
.len
< min_size
) {
528 ufield
->new_val
.data
= mem_heap_alloc(
530 pad_start
= ((char*) ufield
532 + ufield
->new_val
.len
;
533 pad_end
= ((char*) ufield
536 ufield
->new_val
.len
= min_size
;
537 ut_memcpy(ufield
->new_val
.data
,
538 parent_ufield
->new_val
.data
,
539 parent_ufield
->new_val
.len
);
541 switch (UNIV_EXPECT(col
->mbminlen
,1)) {
546 (dtype_get_charset_coll(
548 == DATA_MYSQL_BINARY_CHARSET_COLL
)) {
551 return(ULINT_UNDEFINED
);
555 memset(pad_start
, 0x20,
556 pad_end
- pad_start
);
560 ut_a(!(ufield
->new_val
.len
562 ut_a(!(min_size
% 2));
566 } while (pad_start
< pad_end
);
571 ufield
->extern_storage
= FALSE
;
578 update
->n_fields
= n_fields_updated
;
580 return(n_fields_updated
);
583 /*************************************************************************
584 Set detailed error message associated with foreign key errors for
585 the given transaction. */
588 row_ins_set_detailed(
589 /*=================*/
590 trx_t
* trx
, /* in: transaction */
591 dict_foreign_t
* foreign
) /* in: foreign key constraint */
593 mutex_enter(&srv_misc_tmpfile_mutex
);
594 rewind(srv_misc_tmpfile
);
596 if (os_file_set_eof(srv_misc_tmpfile
)) {
597 ut_print_name(srv_misc_tmpfile
, trx
, TRUE
,
598 foreign
->foreign_table_name
);
599 dict_print_info_on_foreign_key_in_create_format(
600 srv_misc_tmpfile
, trx
, foreign
, FALSE
);
601 trx_set_detailed_error_from_file(trx
, srv_misc_tmpfile
);
603 trx_set_detailed_error(trx
, "temp file operation failed");
606 mutex_exit(&srv_misc_tmpfile_mutex
);
609 /*************************************************************************
610 Reports a foreign key error associated with an update or a delete of a
611 parent table index entry. */
614 row_ins_foreign_report_err(
615 /*=======================*/
616 const char* errstr
, /* in: error string from the viewpoint
617 of the parent table */
618 que_thr_t
* thr
, /* in: query thread whose run_node
620 dict_foreign_t
* foreign
, /* in: foreign key constraint */
621 rec_t
* rec
, /* in: a matching index record in the
623 dtuple_t
* entry
) /* in: index entry in the parent
626 FILE* ef
= dict_foreign_err_file
;
627 trx_t
* trx
= thr_get_trx(thr
);
629 row_ins_set_detailed(trx
, foreign
);
631 mutex_enter(&dict_foreign_err_mutex
);
633 ut_print_timestamp(ef
);
634 fputs(" Transaction:\n", ef
);
635 trx_print(ef
, trx
, 600);
637 fputs("Foreign key constraint fails for table ", ef
);
638 ut_print_name(ef
, trx
, TRUE
, foreign
->foreign_table_name
);
640 dict_print_info_on_foreign_key_in_create_format(ef
, trx
, foreign
,
644 fputs(" in parent table, in index ", ef
);
645 ut_print_name(ef
, trx
, FALSE
, foreign
->referenced_index
->name
);
647 fputs(" tuple:\n", ef
);
648 dtuple_print(ef
, entry
);
650 fputs("\nBut in child table ", ef
);
651 ut_print_name(ef
, trx
, TRUE
, foreign
->foreign_table_name
);
652 fputs(", in index ", ef
);
653 ut_print_name(ef
, trx
, FALSE
, foreign
->foreign_index
->name
);
655 fputs(", there is a record:\n", ef
);
656 rec_print(ef
, rec
, foreign
->foreign_index
);
658 fputs(", the record is not available\n", ef
);
662 mutex_exit(&dict_foreign_err_mutex
);
665 /*************************************************************************
666 Reports a foreign key error to dict_foreign_err_file when we are trying
667 to add an index entry to a child table. Note that the adding may be the result
668 of an update, too. */
671 row_ins_foreign_report_add_err(
672 /*===========================*/
673 trx_t
* trx
, /* in: transaction */
674 dict_foreign_t
* foreign
, /* in: foreign key constraint */
675 rec_t
* rec
, /* in: a record in the parent table:
676 it does not match entry because we
678 dtuple_t
* entry
) /* in: index entry to insert in the
681 FILE* ef
= dict_foreign_err_file
;
683 row_ins_set_detailed(trx
, foreign
);
685 mutex_enter(&dict_foreign_err_mutex
);
687 ut_print_timestamp(ef
);
688 fputs(" Transaction:\n", ef
);
689 trx_print(ef
, trx
, 600);
690 fputs("Foreign key constraint fails for table ", ef
);
691 ut_print_name(ef
, trx
, TRUE
, foreign
->foreign_table_name
);
693 dict_print_info_on_foreign_key_in_create_format(ef
, trx
, foreign
,
695 fputs("\nTrying to add in child table, in index ", ef
);
696 ut_print_name(ef
, trx
, FALSE
, foreign
->foreign_index
->name
);
698 fputs(" tuple:\n", ef
);
699 dtuple_print(ef
, entry
);
701 fputs("\nBut in parent table ", ef
);
702 ut_print_name(ef
, trx
, TRUE
, foreign
->referenced_table_name
);
703 fputs(", in index ", ef
);
704 ut_print_name(ef
, trx
, FALSE
, foreign
->referenced_index
->name
);
705 fputs(",\nthe closest match we can find is record:\n", ef
);
706 if (rec
&& page_rec_is_supremum(rec
)) {
707 /* If the cursor ended on a supremum record, it is better
708 to report the previous record in the error message, so that
709 the user gets a more descriptive error message. */
710 rec
= page_rec_get_prev(rec
);
714 rec_print(ef
, rec
, foreign
->referenced_index
);
718 mutex_exit(&dict_foreign_err_mutex
);
721 /*************************************************************************
722 Invalidate the query cache for the given table. */
725 row_ins_invalidate_query_cache(
726 /*===========================*/
727 que_thr_t
* thr
, /* in: query thread whose run_node
729 const char* name
) /* in: table name prefixed with
730 database name and a '/' character */
734 ulint len
= strlen(name
) + 1;
736 buf
= mem_strdupl(name
, len
);
738 ptr
= strchr(buf
, '/');
742 /* We call a function in ha_innodb.cc */
743 #ifndef UNIV_HOTBACKUP
744 innobase_invalidate_query_cache(thr_get_trx(thr
), buf
, len
);
749 /*************************************************************************
750 Perform referential actions or checks when a parent row is deleted or updated
751 and the constraint had an ON DELETE or ON UPDATE condition which was not
755 row_ins_foreign_check_on_constraint(
756 /*================================*/
757 /* out: DB_SUCCESS, DB_LOCK_WAIT,
759 que_thr_t
* thr
, /* in: query thread whose run_node
761 dict_foreign_t
* foreign
, /* in: foreign key constraint whose
763 btr_pcur_t
* pcur
, /* in: cursor placed on a matching
764 index record in the child table */
765 dtuple_t
* entry
, /* in: index entry in the parent
767 mtr_t
* mtr
) /* in: mtr holding the latch of pcur
772 dict_table_t
* table
= foreign
->foreign_table
;
774 dict_index_t
* clust_index
;
776 mem_heap_t
* upd_vec_heap
= NULL
;
784 mem_heap_t
* tmp_heap
= NULL
;
791 trx
= thr_get_trx(thr
);
793 /* Since we are going to delete or update a row, we have to invalidate
794 the MySQL query cache for table. A deadlock of threads is not possible
795 here because the caller of this function does not hold any latches with
796 the sync0sync.h rank above the kernel mutex. The query cache mutex has
797 a rank just above the kernel mutex. */
799 row_ins_invalidate_query_cache(thr
, table
->name
);
801 node
= thr
->run_node
;
803 if (node
->is_delete
&& 0 == (foreign
->type
804 & (DICT_FOREIGN_ON_DELETE_CASCADE
805 | DICT_FOREIGN_ON_DELETE_SET_NULL
))) {
807 row_ins_foreign_report_err("Trying to delete",
809 btr_pcur_get_rec(pcur
), entry
);
811 return(DB_ROW_IS_REFERENCED
);
814 if (!node
->is_delete
&& 0 == (foreign
->type
815 & (DICT_FOREIGN_ON_UPDATE_CASCADE
816 | DICT_FOREIGN_ON_UPDATE_SET_NULL
))) {
818 /* This is an UPDATE */
820 row_ins_foreign_report_err("Trying to update",
822 btr_pcur_get_rec(pcur
), entry
);
824 return(DB_ROW_IS_REFERENCED
);
827 if (node
->cascade_node
== NULL
) {
828 /* Extend our query graph by creating a child to current
829 update node. The child is used in the cascade or set null
832 node
->cascade_heap
= mem_heap_create(128);
833 node
->cascade_node
= row_create_update_node_for_mysql(
834 table
, node
->cascade_heap
);
835 que_node_set_parent(node
->cascade_node
, node
);
838 /* Initialize cascade_node to do the operation we want. Note that we
839 use the SAME cascade node to do all foreign key operations of the
840 SQL DELETE: the table of the cascade node may change if there are
841 several child tables to the table where the delete is done! */
843 cascade
= node
->cascade_node
;
845 cascade
->table
= table
;
847 cascade
->foreign
= foreign
;
850 && (foreign
->type
& DICT_FOREIGN_ON_DELETE_CASCADE
)) {
851 cascade
->is_delete
= TRUE
;
853 cascade
->is_delete
= FALSE
;
855 if (foreign
->n_fields
> cascade
->update_n_fields
) {
856 /* We have to make the update vector longer */
858 cascade
->update
= upd_create(foreign
->n_fields
,
860 cascade
->update_n_fields
= foreign
->n_fields
;
864 /* We do not allow cyclic cascaded updating (DELETE is allowed,
865 but not UPDATE) of the same table, as this can lead to an infinite
866 cycle. Check that we are not updating the same table which is
867 already being modified in this cascade chain. We have to check
868 this also because the modification of the indexes of a 'parent'
869 table may still be incomplete, and we must avoid seeing the indexes
870 of the parent table in an inconsistent state! */
872 if (!cascade
->is_delete
873 && row_ins_cascade_ancestor_updates_table(cascade
, table
)) {
875 /* We do not know if this would break foreign key
876 constraints, but play safe and return an error */
878 err
= DB_ROW_IS_REFERENCED
;
880 row_ins_foreign_report_err(
881 "Trying an update, possibly causing a cyclic"
883 "in the child table,", thr
, foreign
,
884 btr_pcur_get_rec(pcur
), entry
);
886 goto nonstandard_exit_func
;
889 if (row_ins_cascade_n_ancestors(cascade
) >= 15) {
890 err
= DB_ROW_IS_REFERENCED
;
892 row_ins_foreign_report_err(
893 "Trying a too deep cascaded delete or update\n",
894 thr
, foreign
, btr_pcur_get_rec(pcur
), entry
);
896 goto nonstandard_exit_func
;
899 index
= btr_pcur_get_btr_cur(pcur
)->index
;
901 ut_a(index
== foreign
->foreign_index
);
903 rec
= btr_pcur_get_rec(pcur
);
905 if (index
->type
& DICT_CLUSTERED
) {
906 /* pcur is already positioned in the clustered index of
912 /* We have to look for the record in the clustered index
913 in the child table */
915 clust_index
= dict_table_get_first_index(table
);
917 tmp_heap
= mem_heap_create(256);
919 ref
= row_build_row_ref(ROW_COPY_POINTERS
, index
, rec
,
921 btr_pcur_open_with_no_init(clust_index
, ref
,
922 PAGE_CUR_LE
, BTR_SEARCH_LEAF
,
923 cascade
->pcur
, 0, mtr
);
925 clust_rec
= btr_pcur_get_rec(cascade
->pcur
);
927 if (!page_rec_is_user_rec(clust_rec
)
928 || btr_pcur_get_low_match(cascade
->pcur
)
929 < dict_index_get_n_unique(clust_index
)) {
931 fputs("InnoDB: error in cascade of a foreign key op\n"
933 dict_index_name_print(stderr
, trx
, index
);
936 "InnoDB: record ", stderr
);
937 rec_print(stderr
, rec
, index
);
939 "InnoDB: clustered record ", stderr
);
940 rec_print(stderr
, clust_rec
, clust_index
);
942 "InnoDB: Submit a detailed bug report to"
943 " http://bugs.mysql.com\n", stderr
);
947 goto nonstandard_exit_func
;
951 /* Set an X-lock on the row to delete or update in the child table */
953 err
= lock_table(0, table
, LOCK_IX
, thr
);
955 if (err
== DB_SUCCESS
) {
956 /* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
957 we already have a normal shared lock on the appropriate
958 gap if the search criterion was not unique */
960 err
= lock_clust_rec_read_check_and_lock_alt(
961 0, clust_rec
, clust_index
, LOCK_X
, LOCK_REC_NOT_GAP
,
965 if (err
!= DB_SUCCESS
) {
967 goto nonstandard_exit_func
;
970 if (rec_get_deleted_flag(clust_rec
, dict_table_is_comp(table
))) {
971 /* This can happen if there is a circular reference of
972 rows such that cascading delete comes to delete a row
973 already in the process of being delete marked */
976 goto nonstandard_exit_func
;
980 ? (foreign
->type
& DICT_FOREIGN_ON_DELETE_SET_NULL
)
981 : (foreign
->type
& DICT_FOREIGN_ON_UPDATE_SET_NULL
)) {
983 /* Build the appropriate update vector which sets
984 foreign->n_fields first fields in rec to SQL NULL */
986 update
= cascade
->update
;
988 update
->info_bits
= 0;
989 update
->n_fields
= foreign
->n_fields
;
990 UNIV_MEM_INVALID(update
->fields
,
991 update
->n_fields
* sizeof *update
->fields
);
993 for (i
= 0; i
< foreign
->n_fields
; i
++) {
994 (update
->fields
+ i
)->field_no
995 = dict_table_get_nth_col_pos(
997 dict_index_get_nth_col_no(index
, i
));
998 (update
->fields
+ i
)->exp
= NULL
;
999 (update
->fields
+ i
)->new_val
.len
= UNIV_SQL_NULL
;
1000 (update
->fields
+ i
)->new_val
.data
= NULL
;
1001 (update
->fields
+ i
)->extern_storage
= FALSE
;
1005 if (!node
->is_delete
1006 && (foreign
->type
& DICT_FOREIGN_ON_UPDATE_CASCADE
)) {
1008 /* Build the appropriate update vector which sets changing
1009 foreign->n_fields first fields in rec to new values */
1011 upd_vec_heap
= mem_heap_create(256);
1013 n_to_update
= row_ins_cascade_calc_update_vec(node
, foreign
,
1015 if (n_to_update
== ULINT_UNDEFINED
) {
1016 err
= DB_ROW_IS_REFERENCED
;
1018 row_ins_foreign_report_err(
1019 "Trying a cascaded update where the"
1020 " updated value in the child\n"
1021 "table would not fit in the length"
1022 " of the column, or the value would\n"
1023 "be NULL and the column is"
1024 " declared as not NULL in the child table,",
1025 thr
, foreign
, btr_pcur_get_rec(pcur
), entry
);
1027 goto nonstandard_exit_func
;
1030 if (cascade
->update
->n_fields
== 0) {
1032 /* The update does not change any columns referred
1033 to in this foreign key constraint: no need to do
1038 goto nonstandard_exit_func
;
1042 /* Store pcur position and initialize or store the cascade node
1043 pcur stored position */
1045 btr_pcur_store_position(pcur
, mtr
);
1047 if (index
== clust_index
) {
1048 btr_pcur_copy_stored_position(cascade
->pcur
, pcur
);
1050 btr_pcur_store_position(cascade
->pcur
, mtr
);
1055 ut_a(cascade
->pcur
->rel_pos
== BTR_PCUR_ON
);
1057 cascade
->state
= UPD_NODE_UPDATE_CLUSTERED
;
1059 err
= row_update_cascade_for_mysql(thr
, cascade
,
1060 foreign
->foreign_table
);
1062 if (foreign
->foreign_table
->n_foreign_key_checks_running
== 0) {
1064 "InnoDB: error: table %s has the counter 0"
1065 " though there is\n"
1066 "InnoDB: a FOREIGN KEY check running on it.\n",
1067 foreign
->foreign_table
->name
);
1070 /* Release the data dictionary latch for a while, so that we do not
1071 starve other threads from doing CREATE TABLE etc. if we have a huge
1072 cascaded operation running. The counter n_foreign_key_checks_running
1073 will prevent other users from dropping or ALTERing the table when we
1074 release the latch. */
1076 row_mysql_unfreeze_data_dictionary(thr_get_trx(thr
));
1078 DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
1080 row_mysql_freeze_data_dictionary(thr_get_trx(thr
));
1084 /* Restore pcur position */
1086 btr_pcur_restore_position(BTR_SEARCH_LEAF
, pcur
, mtr
);
1089 mem_heap_free(tmp_heap
);
1093 mem_heap_free(upd_vec_heap
);
1098 nonstandard_exit_func
:
1100 mem_heap_free(tmp_heap
);
1104 mem_heap_free(upd_vec_heap
);
1107 btr_pcur_store_position(pcur
, mtr
);
1112 btr_pcur_restore_position(BTR_SEARCH_LEAF
, pcur
, mtr
);
1117 /*************************************************************************
1118 Sets a shared lock on a record. Used in locking possible duplicate key
1119 records and also in checking foreign key constraints. */
1122 row_ins_set_shared_rec_lock(
1123 /*========================*/
1124 /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC,
1126 ulint type
, /* in: LOCK_ORDINARY, LOCK_GAP, or
1127 LOCK_REC_NOT_GAP type lock */
1128 rec_t
* rec
, /* in: record */
1129 dict_index_t
* index
, /* in: index */
1130 const ulint
* offsets
,/* in: rec_get_offsets(rec, index) */
1131 que_thr_t
* thr
) /* in: query thread */
1135 ut_ad(rec_offs_validate(rec
, index
, offsets
));
1137 if (index
->type
& DICT_CLUSTERED
) {
1138 err
= lock_clust_rec_read_check_and_lock(
1139 0, rec
, index
, offsets
, LOCK_S
, type
, thr
);
1141 err
= lock_sec_rec_read_check_and_lock(
1142 0, rec
, index
, offsets
, LOCK_S
, type
, thr
);
1148 #ifndef UNIV_HOTBACKUP
1149 /*************************************************************************
1150 Sets a exclusive lock on a record. Used in locking possible duplicate key
1154 row_ins_set_exclusive_rec_lock(
1155 /*===========================*/
1156 /* out: DB_SUCCESS, DB_SUCCESS_LOCKED_REC,
1158 ulint type
, /* in: LOCK_ORDINARY, LOCK_GAP, or
1159 LOCK_REC_NOT_GAP type lock */
1160 rec_t
* rec
, /* in: record */
1161 dict_index_t
* index
, /* in: index */
1162 const ulint
* offsets
,/* in: rec_get_offsets(rec, index) */
1163 que_thr_t
* thr
) /* in: query thread */
1167 ut_ad(rec_offs_validate(rec
, index
, offsets
));
1169 if (index
->type
& DICT_CLUSTERED
) {
1170 err
= lock_clust_rec_read_check_and_lock(
1171 0, rec
, index
, offsets
, LOCK_X
, type
, thr
);
1173 err
= lock_sec_rec_read_check_and_lock(
1174 0, rec
, index
, offsets
, LOCK_X
, type
, thr
);
1179 #endif /* !UNIV_HOTBACKUP */
1181 /*******************************************************************
1182 Checks if foreign key constraint fails for an index entry. Sets shared locks
1183 which lock either the success or the failure of the constraint. NOTE that
1184 the caller must have a shared latch on dict_operation_lock. */
1187 row_ins_check_foreign_constraint(
1188 /*=============================*/
1190 DB_NO_REFERENCED_ROW,
1191 or DB_ROW_IS_REFERENCED */
1192 ibool check_ref
,/* in: TRUE if we want to check that
1193 the referenced table is ok, FALSE if we
1194 want to to check the foreign key table */
1195 dict_foreign_t
* foreign
,/* in: foreign constraint; NOTE that the
1196 tables mentioned in it must be in the
1197 dictionary cache if they exist at all */
1198 dict_table_t
* table
, /* in: if check_ref is TRUE, then the foreign
1199 table, else the referenced table */
1200 dtuple_t
* entry
, /* in: index entry for index */
1201 que_thr_t
* thr
) /* in: query thread */
1203 upd_node_t
* upd_node
;
1204 dict_table_t
* check_table
;
1205 dict_index_t
* check_index
;
1212 trx_t
* trx
= thr_get_trx(thr
);
1213 mem_heap_t
* heap
= NULL
;
1214 ulint offsets_
[REC_OFFS_NORMAL_SIZE
];
1215 ulint
* offsets
= offsets_
;
1216 *offsets_
= (sizeof offsets_
) / sizeof *offsets_
;
1219 #ifdef UNIV_SYNC_DEBUG
1220 ut_ad(rw_lock_own(&dict_operation_lock
, RW_LOCK_SHARED
));
1221 #endif /* UNIV_SYNC_DEBUG */
1225 if (trx
->check_foreigns
== FALSE
) {
1226 /* The user has suppressed foreign key checks currently for
1231 /* If any of the foreign key fields in entry is SQL NULL, we
1232 suppress the foreign key check: this is compatible with Oracle,
1235 for (i
= 0; i
< foreign
->n_fields
; i
++) {
1236 if (UNIV_SQL_NULL
== dfield_get_len(
1237 dtuple_get_nth_field(entry
, i
))) {
1243 if (que_node_get_type(thr
->run_node
) == QUE_NODE_UPDATE
) {
1244 upd_node
= thr
->run_node
;
1246 if (!(upd_node
->is_delete
) && upd_node
->foreign
== foreign
) {
1247 /* If a cascaded update is done as defined by a
1248 foreign key constraint, do not check that
1249 constraint for the child row. In ON UPDATE CASCADE
1250 the update of the parent row is only half done when
1251 we come here: if we would check the constraint here
1252 for the child row it would fail.
1254 A QUESTION remains: if in the child table there are
1255 several constraints which refer to the same parent
1256 table, we should merge all updates to the child as
1257 one update? And the updates can be contradictory!
1258 Currently we just perform the update associated
1259 with each foreign key constraint, one after
1260 another, and the user has problems predicting in
1261 which order they are performed. */
1268 check_table
= foreign
->referenced_table
;
1269 check_index
= foreign
->referenced_index
;
1271 check_table
= foreign
->foreign_table
;
1272 check_index
= foreign
->foreign_index
;
1275 if (check_table
== NULL
|| check_table
->ibd_file_missing
) {
1277 FILE* ef
= dict_foreign_err_file
;
1279 row_ins_set_detailed(trx
, foreign
);
1281 mutex_enter(&dict_foreign_err_mutex
);
1283 ut_print_timestamp(ef
);
1284 fputs(" Transaction:\n", ef
);
1285 trx_print(ef
, trx
, 600);
1286 fputs("Foreign key constraint fails for table ", ef
);
1287 ut_print_name(ef
, trx
, TRUE
,
1288 foreign
->foreign_table_name
);
1290 dict_print_info_on_foreign_key_in_create_format(
1291 ef
, trx
, foreign
, TRUE
);
1292 fputs("\nTrying to add to index ", ef
);
1293 ut_print_name(ef
, trx
, FALSE
,
1294 foreign
->foreign_index
->name
);
1295 fputs(" tuple:\n", ef
);
1296 dtuple_print(ef
, entry
);
1297 fputs("\nBut the parent table ", ef
);
1298 ut_print_name(ef
, trx
, TRUE
,
1299 foreign
->referenced_table_name
);
1300 fputs("\nor its .ibd file does"
1301 " not currently exist!\n", ef
);
1302 mutex_exit(&dict_foreign_err_mutex
);
1304 err
= DB_NO_REFERENCED_ROW
;
1313 if (check_table
!= table
) {
1314 /* We already have a LOCK_IX on table, but not necessarily
1317 err
= lock_table(0, check_table
, LOCK_IS
, thr
);
1319 if (err
!= DB_SUCCESS
) {
1321 goto do_possible_lock_wait
;
1327 /* Store old value on n_fields_cmp */
1329 n_fields_cmp
= dtuple_get_n_fields_cmp(entry
);
1331 dtuple_set_n_fields_cmp(entry
, foreign
->n_fields
);
1333 btr_pcur_open(check_index
, entry
, PAGE_CUR_GE
,
1334 BTR_SEARCH_LEAF
, &pcur
, &mtr
);
1336 /* Scan index records and check if there is a matching record */
1339 rec_t
* rec
= btr_pcur_get_rec(&pcur
);
1341 if (page_rec_is_infimum(rec
)) {
1346 offsets
= rec_get_offsets(rec
, check_index
,
1347 offsets
, ULINT_UNDEFINED
, &heap
);
1349 if (page_rec_is_supremum(rec
)) {
1351 err
= row_ins_set_shared_rec_lock(
1352 LOCK_ORDINARY
, rec
, check_index
, offsets
, thr
);
1354 case DB_SUCCESS_LOCKED_REC
:
1362 cmp
= cmp_dtuple_rec(entry
, rec
, offsets
);
1365 if (rec_get_deleted_flag(rec
,
1366 rec_offs_comp(offsets
))) {
1367 err
= row_ins_set_shared_rec_lock(
1368 LOCK_ORDINARY
, rec
, check_index
,
1371 case DB_SUCCESS_LOCKED_REC
:
1378 /* Found a matching record. Lock only
1379 a record because we can allow inserts
1382 err
= row_ins_set_shared_rec_lock(
1383 LOCK_REC_NOT_GAP
, rec
, check_index
,
1387 case DB_SUCCESS_LOCKED_REC
:
1398 } else if (foreign
->type
!= 0) {
1399 /* There is an ON UPDATE or ON DELETE
1400 condition: check them in a separate
1403 err
= row_ins_foreign_check_on_constraint(
1404 thr
, foreign
, &pcur
, entry
,
1406 if (err
!= DB_SUCCESS
) {
1407 /* Since reporting a plain
1408 "duplicate key" error
1409 message to the user in
1410 cases where a long CASCADE
1411 operation would lead to a
1412 duplicate key in some
1414 confusing, map duplicate
1415 key errors resulting from
1417 separate error code. */
1419 if (err
== DB_DUPLICATE_KEY
) {
1420 err
= DB_FOREIGN_DUPLICATE_KEY
;
1426 row_ins_foreign_report_err(
1427 "Trying to delete or update",
1428 thr
, foreign
, rec
, entry
);
1430 err
= DB_ROW_IS_REFERENCED
;
1437 err
= row_ins_set_shared_rec_lock(
1438 LOCK_GAP
, rec
, check_index
, offsets
, thr
);
1440 case DB_SUCCESS_LOCKED_REC
:
1443 err
= DB_NO_REFERENCED_ROW
;
1444 row_ins_foreign_report_add_err(
1445 trx
, foreign
, rec
, entry
);
1453 } while (btr_pcur_move_to_next(&pcur
, &mtr
));
1456 row_ins_foreign_report_add_err(
1457 trx
, foreign
, btr_pcur_get_rec(&pcur
), entry
);
1458 err
= DB_NO_REFERENCED_ROW
;
1464 btr_pcur_close(&pcur
);
1468 /* Restore old value */
1469 dtuple_set_n_fields_cmp(entry
, n_fields_cmp
);
1471 do_possible_lock_wait
:
1472 if (err
== DB_LOCK_WAIT
) {
1473 trx
->error_state
= err
;
1475 que_thr_stop_for_mysql(thr
);
1477 srv_suspend_mysql_thread(thr
);
1479 if (trx
->error_state
== DB_SUCCESS
) {
1484 err
= trx
->error_state
;
1488 if (UNIV_LIKELY_NULL(heap
)) {
1489 mem_heap_free(heap
);
1494 /*******************************************************************
1495 Checks if foreign key constraints fail for an index entry. If index
1496 is not mentioned in any constraint, this function does nothing,
1497 Otherwise does searches to the indexes of referenced tables and
1498 sets shared locks which lock either the success or the failure of
1502 row_ins_check_foreign_constraints(
1503 /*==============================*/
1504 /* out: DB_SUCCESS or error code */
1505 dict_table_t
* table
, /* in: table */
1506 dict_index_t
* index
, /* in: index */
1507 dtuple_t
* entry
, /* in: index entry for index */
1508 que_thr_t
* thr
) /* in: query thread */
1510 dict_foreign_t
* foreign
;
1513 ibool got_s_lock
= FALSE
;
1515 trx
= thr_get_trx(thr
);
1517 foreign
= UT_LIST_GET_FIRST(table
->foreign_list
);
1520 if (foreign
->foreign_index
== index
) {
1522 if (foreign
->referenced_table
== NULL
) {
1523 dict_table_get(foreign
->referenced_table_name
,
1527 if (0 == trx
->dict_operation_lock_mode
) {
1530 row_mysql_freeze_data_dictionary(trx
);
1533 if (foreign
->referenced_table
) {
1534 mutex_enter(&(dict_sys
->mutex
));
1536 (foreign
->referenced_table
1537 ->n_foreign_key_checks_running
)++;
1539 mutex_exit(&(dict_sys
->mutex
));
1542 /* NOTE that if the thread ends up waiting for a lock
1543 we will release dict_operation_lock temporarily!
1544 But the counter on the table protects the referenced
1545 table from being dropped while the check is running. */
1547 err
= row_ins_check_foreign_constraint(
1548 TRUE
, foreign
, table
, entry
, thr
);
1550 if (foreign
->referenced_table
) {
1551 mutex_enter(&(dict_sys
->mutex
));
1553 ut_a(foreign
->referenced_table
1554 ->n_foreign_key_checks_running
> 0);
1555 (foreign
->referenced_table
1556 ->n_foreign_key_checks_running
)--;
1558 mutex_exit(&(dict_sys
->mutex
));
1562 row_mysql_unfreeze_data_dictionary(trx
);
1565 if (err
!= DB_SUCCESS
) {
1570 foreign
= UT_LIST_GET_NEXT(foreign_list
, foreign
);
1576 #ifndef UNIV_HOTBACKUP
1577 /*******************************************************************
1578 Checks if a unique key violation to rec would occur at the index entry
1582 row_ins_dupl_error_with_rec(
1583 /*========================*/
1584 /* out: TRUE if error */
1585 rec_t
* rec
, /* in: user record; NOTE that we assume
1586 that the caller already has a record lock on
1588 dtuple_t
* entry
, /* in: entry to insert */
1589 dict_index_t
* index
, /* in: index */
1590 const ulint
* offsets
)/* in: rec_get_offsets(rec, index) */
1592 ulint matched_fields
;
1593 ulint matched_bytes
;
1597 ut_ad(rec_offs_validate(rec
, index
, offsets
));
1599 n_unique
= dict_index_get_n_unique(index
);
1604 cmp_dtuple_rec_with_match(entry
, rec
, offsets
,
1605 &matched_fields
, &matched_bytes
);
1607 if (matched_fields
< n_unique
) {
1612 /* In a unique secondary index we allow equal key values if they
1613 contain SQL NULLs */
1615 if (!(index
->type
& DICT_CLUSTERED
)) {
1617 for (i
= 0; i
< n_unique
; i
++) {
1618 if (UNIV_SQL_NULL
== dfield_get_len(
1619 dtuple_get_nth_field(entry
, i
))) {
1626 return(!rec_get_deleted_flag(rec
, rec_offs_comp(offsets
)));
1628 #endif /* !UNIV_HOTBACKUP */
1630 /*******************************************************************
1631 Scans a unique non-clustered index at a given index entry to determine
1632 whether a uniqueness violation has occurred for the key value of the entry.
1633 Set shared locks on possible duplicate records. */
1636 row_ins_scan_sec_index_for_duplicate(
1637 /*=================================*/
1638 /* out: DB_SUCCESS, DB_DUPLICATE_KEY, or
1640 dict_index_t
* index
, /* in: non-clustered unique index */
1641 dtuple_t
* entry
, /* in: index entry */
1642 que_thr_t
* thr
) /* in: query thread */
1644 #ifndef UNIV_HOTBACKUP
1650 ulint err
= DB_SUCCESS
;
1651 unsigned allow_duplicates
;
1653 mem_heap_t
* heap
= NULL
;
1654 ulint offsets_
[REC_OFFS_NORMAL_SIZE
];
1655 ulint
* offsets
= offsets_
;
1656 *offsets_
= (sizeof offsets_
) / sizeof *offsets_
;
1658 n_unique
= dict_index_get_n_unique(index
);
1660 /* If the secondary index is unique, but one of the fields in the
1661 n_unique first fields is NULL, a unique key violation cannot occur,
1662 since we define NULL != NULL in this case */
1664 for (i
= 0; i
< n_unique
; i
++) {
1665 if (UNIV_SQL_NULL
== dfield_get_len(
1666 dtuple_get_nth_field(entry
, i
))) {
1674 /* Store old value on n_fields_cmp */
1676 n_fields_cmp
= dtuple_get_n_fields_cmp(entry
);
1678 dtuple_set_n_fields_cmp(entry
, dict_index_get_n_unique(index
));
1680 btr_pcur_open(index
, entry
, PAGE_CUR_GE
, BTR_SEARCH_LEAF
, &pcur
, &mtr
);
1682 allow_duplicates
= thr_get_trx(thr
)->duplicates
;
1684 /* Scan index records and check if there is a duplicate */
1687 rec_t
* rec
= btr_pcur_get_rec(&pcur
);
1689 if (page_rec_is_infimum(rec
)) {
1694 offsets
= rec_get_offsets(rec
, index
, offsets
,
1695 ULINT_UNDEFINED
, &heap
);
1697 if (allow_duplicates
) {
1699 /* If the SQL-query will update or replace
1700 duplicate key we will take X-lock for
1701 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1702 INSERT ON DUPLICATE KEY UPDATE). */
1704 err
= row_ins_set_exclusive_rec_lock(
1705 LOCK_ORDINARY
, rec
, index
, offsets
, thr
);
1708 err
= row_ins_set_shared_rec_lock(
1709 LOCK_ORDINARY
, rec
, index
, offsets
, thr
);
1713 case DB_SUCCESS_LOCKED_REC
:
1721 if (page_rec_is_supremum(rec
)) {
1726 cmp
= cmp_dtuple_rec(entry
, rec
, offsets
);
1729 if (row_ins_dupl_error_with_rec(rec
, entry
,
1731 err
= DB_DUPLICATE_KEY
;
1733 thr_get_trx(thr
)->error_info
= index
;
1741 } while (btr_pcur_move_to_next(&pcur
, &mtr
));
1744 if (UNIV_LIKELY_NULL(heap
)) {
1745 mem_heap_free(heap
);
1749 /* Restore old value */
1750 dtuple_set_n_fields_cmp(entry
, n_fields_cmp
);
1753 #else /* UNIV_HOTBACKUP */
1754 /* This function depends on MySQL code that is not included in
1755 InnoDB Hot Backup builds. Besides, this function should never
1756 be called in InnoDB Hot Backup. */
1759 #endif /* UNIV_HOTBACKUP */
1762 /*******************************************************************
1763 Checks if a unique key violation error would occur at an index entry
1764 insert. Sets shared locks on possible duplicate records. Works only
1765 for a clustered index! */
1768 row_ins_duplicate_error_in_clust(
1769 /*=============================*/
1770 /* out: DB_SUCCESS if no error,
1771 DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we
1772 have to wait for a lock on a possible
1774 btr_cur_t
* cursor
, /* in: B-tree cursor */
1775 dtuple_t
* entry
, /* in: entry to insert */
1776 que_thr_t
* thr
, /* in: query thread */
1777 mtr_t
* mtr
) /* in: mtr */
1779 #ifndef UNIV_HOTBACKUP
1783 trx_t
* trx
= thr_get_trx(thr
);
1784 mem_heap_t
*heap
= NULL
;
1785 ulint offsets_
[REC_OFFS_NORMAL_SIZE
];
1786 ulint
* offsets
= offsets_
;
1787 *offsets_
= (sizeof offsets_
) / sizeof *offsets_
;
1791 ut_a(cursor
->index
->type
& DICT_CLUSTERED
);
1792 ut_ad(cursor
->index
->type
& DICT_UNIQUE
);
1794 /* NOTE: For unique non-clustered indexes there may be any number
1795 of delete marked records with the same value for the non-clustered
1796 index key (remember multiversioning), and which differ only in
1797 the row refererence part of the index record, containing the
1798 clustered index key fields. For such a secondary index record,
1799 to avoid race condition, we must FIRST do the insertion and after
1800 that check that the uniqueness condition is not breached! */
1802 /* NOTE: A problem is that in the B-tree node pointers on an
1803 upper level may match more to the entry than the actual existing
1804 user records on the leaf level. So, even if low_match would suggest
1805 that a duplicate key violation may occur, this may not be the case. */
1807 n_unique
= dict_index_get_n_unique(cursor
->index
);
1809 if (cursor
->low_match
>= n_unique
) {
1811 rec
= btr_cur_get_rec(cursor
);
1813 if (!page_rec_is_infimum(rec
)) {
1814 offsets
= rec_get_offsets(rec
, cursor
->index
, offsets
,
1815 ULINT_UNDEFINED
, &heap
);
1817 /* We set a lock on the possible duplicate: this
1818 is needed in logical logging of MySQL to make
1819 sure that in roll-forward we get the same duplicate
1820 errors as in original execution */
1822 if (trx
->duplicates
) {
1824 /* If the SQL-query will update or replace
1825 duplicate key we will take X-lock for
1826 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1827 INSERT ON DUPLICATE KEY UPDATE). */
1829 err
= row_ins_set_exclusive_rec_lock(
1830 LOCK_REC_NOT_GAP
, rec
,
1831 cursor
->index
, offsets
, thr
);
1834 err
= row_ins_set_shared_rec_lock(
1835 LOCK_REC_NOT_GAP
, rec
,
1836 cursor
->index
, offsets
, thr
);
1840 case DB_SUCCESS_LOCKED_REC
:
1847 if (row_ins_dupl_error_with_rec(
1848 rec
, entry
, cursor
->index
, offsets
)) {
1849 trx
->error_info
= cursor
->index
;
1850 err
= DB_DUPLICATE_KEY
;
1856 if (cursor
->up_match
>= n_unique
) {
1858 rec
= page_rec_get_next(btr_cur_get_rec(cursor
));
1860 if (!page_rec_is_supremum(rec
)) {
1861 offsets
= rec_get_offsets(rec
, cursor
->index
, offsets
,
1862 ULINT_UNDEFINED
, &heap
);
1864 if (trx
->duplicates
) {
1866 /* If the SQL-query will update or replace
1867 duplicate key we will take X-lock for
1868 duplicates ( REPLACE, LOAD DATAFILE REPLACE,
1869 INSERT ON DUPLICATE KEY UPDATE). */
1871 err
= row_ins_set_exclusive_rec_lock(
1872 LOCK_REC_NOT_GAP
, rec
,
1873 cursor
->index
, offsets
, thr
);
1876 err
= row_ins_set_shared_rec_lock(
1877 LOCK_REC_NOT_GAP
, rec
,
1878 cursor
->index
, offsets
, thr
);
1882 case DB_SUCCESS_LOCKED_REC
:
1889 if (row_ins_dupl_error_with_rec(
1890 rec
, entry
, cursor
->index
, offsets
)) {
1891 trx
->error_info
= cursor
->index
;
1892 err
= DB_DUPLICATE_KEY
;
1897 ut_a(!(cursor
->index
->type
& DICT_CLUSTERED
));
1898 /* This should never happen */
1903 if (UNIV_LIKELY_NULL(heap
)) {
1904 mem_heap_free(heap
);
1907 #else /* UNIV_HOTBACKUP */
1908 /* This function depends on MySQL code that is not included in
1909 InnoDB Hot Backup builds. Besides, this function should never
1910 be called in InnoDB Hot Backup. */
1913 #endif /* UNIV_HOTBACKUP */
1916 /*******************************************************************
1917 Checks if an index entry has long enough common prefix with an existing
1918 record so that the intended insert of the entry must be changed to a modify of
1919 the existing record. In the case of a clustered index, the prefix must be
1920 n_unique fields long, and in the case of a secondary index, all fields must be
1924 row_ins_must_modify(
1925 /*================*/
1926 /* out: 0 if no update, ROW_INS_PREV if
1927 previous should be updated; currently we
1928 do the search so that only the low_match
1929 record can match enough to the search tuple,
1930 not the next record */
1931 btr_cur_t
* cursor
) /* in: B-tree cursor */
1936 /* NOTE: (compare to the note in row_ins_duplicate_error) Because node
1937 pointers on upper levels of the B-tree may match more to entry than
1938 to actual user records on the leaf level, we have to check if the
1939 candidate record is actually a user record. In a clustered index
1940 node pointers contain index->n_unique first fields, and in the case
1941 of a secondary index, all fields of the index. */
1943 enough_match
= dict_index_get_n_unique_in_tree(cursor
->index
);
1945 if (cursor
->low_match
>= enough_match
) {
1947 rec
= btr_cur_get_rec(cursor
);
1949 if (!page_rec_is_infimum(rec
)) {
1951 return(ROW_INS_PREV
);
1958 /*******************************************************************
1959 Tries to insert an index entry to an index. If the index is clustered
1960 and a record with the same unique key is found, the other record is
1961 necessarily marked deleted by a committed transaction, or a unique key
1962 violation error occurs. The delete marked record is then updated to an
1963 existing record, and we must write an undo log record on the delete
1964 marked record. If the index is secondary, and a record with exactly the
1965 same fields is found, the other record is necessarily marked deleted.
1966 It is then unmarked. Otherwise, the entry is just inserted to the index. */
1969 row_ins_index_entry_low(
1970 /*====================*/
1971 /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL
1972 if pessimistic retry needed, or error code */
1973 ulint mode
, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
1974 depending on whether we wish optimistic or
1975 pessimistic descent down the index tree */
1976 dict_index_t
* index
, /* in: index */
1977 dtuple_t
* entry
, /* in: index entry to insert */
1978 ulint
* ext_vec
,/* in: array containing field numbers of
1979 externally stored fields in entry, or NULL */
1980 ulint n_ext_vec
,/* in: number of fields in ext_vec */
1981 que_thr_t
* thr
) /* in: query thread */
1984 ulint ignore_sec_unique
= 0;
1985 ulint modify
= 0; /* remove warning */
1990 big_rec_t
* big_rec
= NULL
;
1992 mem_heap_t
* heap
= NULL
;
1993 ulint offsets_
[REC_OFFS_NORMAL_SIZE
];
1994 ulint
* offsets
= offsets_
;
1995 *offsets_
= (sizeof offsets_
) / sizeof *offsets_
;
2003 /* Note that we use PAGE_CUR_LE as the search mode, because then
2004 the function will return in both low_match and up_match of the
2005 cursor sensible values */
2007 if (!(thr_get_trx(thr
)->check_unique_secondary
)) {
2008 ignore_sec_unique
= BTR_IGNORE_SEC_UNIQUE
;
2011 btr_cur_search_to_nth_level(index
, 0, entry
, PAGE_CUR_LE
,
2012 mode
| BTR_INSERT
| ignore_sec_unique
,
2015 if (cursor
.flag
== BTR_CUR_INSERT_TO_IBUF
) {
2016 /* The insertion was made to the insert buffer already during
2017 the search: we are done */
2026 page_t
* page
= btr_cur_get_page(&cursor
);
2027 rec_t
* first_rec
= page_rec_get_next(
2028 page_get_infimum_rec(page
));
2030 if (UNIV_LIKELY(first_rec
!= page_get_supremum_rec(page
))) {
2031 ut_a(rec_get_n_fields(first_rec
, index
)
2032 == dtuple_get_n_fields(entry
));
2037 n_unique
= dict_index_get_n_unique(index
);
2039 if (index
->type
& DICT_UNIQUE
&& (cursor
.up_match
>= n_unique
2040 || cursor
.low_match
>= n_unique
)) {
2042 if (index
->type
& DICT_CLUSTERED
) {
2043 /* Note that the following may return also
2046 err
= row_ins_duplicate_error_in_clust(
2047 &cursor
, entry
, thr
, &mtr
);
2048 if (err
!= DB_SUCCESS
) {
2054 err
= row_ins_scan_sec_index_for_duplicate(
2058 if (err
!= DB_SUCCESS
) {
2063 /* We did not find a duplicate and we have now
2064 locked with s-locks the necessary records to
2065 prevent any insertion of a duplicate by another
2066 transaction. Let us now reposition the cursor and
2067 continue the insertion. */
2069 btr_cur_search_to_nth_level(index
, 0, entry
,
2076 modify
= row_ins_must_modify(&cursor
);
2079 /* There is already an index entry with a long enough common
2080 prefix, we must convert the insert into a modify of an
2083 if (modify
== ROW_INS_NEXT
) {
2084 rec
= page_rec_get_next(btr_cur_get_rec(&cursor
));
2086 btr_cur_position(index
, rec
, &cursor
);
2089 if (index
->type
& DICT_CLUSTERED
) {
2090 err
= row_ins_clust_index_entry_by_modify(
2091 mode
, &cursor
, &big_rec
, entry
,
2092 ext_vec
, n_ext_vec
, thr
, &mtr
);
2094 err
= row_ins_sec_index_entry_by_modify(
2095 mode
, &cursor
, entry
, thr
, &mtr
);
2099 if (mode
== BTR_MODIFY_LEAF
) {
2100 err
= btr_cur_optimistic_insert(
2101 0, &cursor
, entry
, &insert_rec
, &big_rec
,
2104 ut_a(mode
== BTR_MODIFY_TREE
);
2105 if (buf_LRU_buf_pool_running_out()) {
2107 err
= DB_LOCK_TABLE_FULL
;
2111 err
= btr_cur_pessimistic_insert(
2112 0, &cursor
, entry
, &insert_rec
, &big_rec
,
2116 if (err
== DB_SUCCESS
) {
2118 rec_set_field_extern_bits(insert_rec
, index
,
2132 "row_ins_extern_checkpoint",
2133 log_make_checkpoint_at(ut_dulint_max
, TRUE
););
2137 DEBUG_SYNC_C("before_row_ins_extern_latch");
2138 btr_cur_search_to_nth_level(index
, 0, entry
, PAGE_CUR_LE
,
2139 BTR_MODIFY_TREE
, &cursor
, 0, &mtr
);
2140 rec
= btr_cur_get_rec(&cursor
);
2141 offsets
= rec_get_offsets(rec
, index
, offsets
,
2142 ULINT_UNDEFINED
, &heap
);
2144 DEBUG_SYNC_C("before_row_ins_upd_extern");
2145 err
= btr_store_big_rec_extern_fields(index
, rec
,
2146 offsets
, big_rec
, &mtr
);
2147 DEBUG_SYNC_C("after_row_ins_upd_extern");
2150 dtuple_big_rec_free(big_rec
);
2152 dtuple_convert_back_big_rec(index
, entry
, big_rec
);
2158 if (UNIV_LIKELY_NULL(heap
)) {
2159 mem_heap_free(heap
);
2164 /*******************************************************************
2165 Inserts an index entry to index. Tries first optimistic, then pessimistic
2166 descent down the tree. If the entry matches enough to a delete marked record,
2167 performs the insert by updating or delete unmarking the delete marked
2171 row_ins_index_entry(
2172 /*================*/
2173 /* out: DB_SUCCESS, DB_LOCK_WAIT,
2174 DB_DUPLICATE_KEY, or some other error code */
2175 dict_index_t
* index
, /* in: index */
2176 dtuple_t
* entry
, /* in: index entry to insert */
2177 ulint
* ext_vec
,/* in: array containing field numbers of
2178 externally stored fields in entry, or NULL */
2179 ulint n_ext_vec
,/* in: number of fields in ext_vec */
2180 que_thr_t
* thr
) /* in: query thread */
2184 if (UT_LIST_GET_FIRST(index
->table
->foreign_list
)) {
2185 err
= row_ins_check_foreign_constraints(index
->table
, index
,
2187 if (err
!= DB_SUCCESS
) {
2193 /* Try first optimistic descent to the B-tree */
2195 err
= row_ins_index_entry_low(BTR_MODIFY_LEAF
, index
, entry
,
2196 ext_vec
, n_ext_vec
, thr
);
2197 if (err
!= DB_FAIL
) {
2198 if (index
== dict_table_get_first_index(index
->table
)
2199 && thr_get_trx(thr
)->mysql_thd
!= 0) {
2200 DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
2205 /* Try then pessimistic descent to the B-tree */
2207 err
= row_ins_index_entry_low(BTR_MODIFY_TREE
, index
, entry
,
2208 ext_vec
, n_ext_vec
, thr
);
2212 /***************************************************************
2213 Sets the values of the dtuple fields in entry from the values of appropriate
2217 row_ins_index_entry_set_vals(
2218 /*=========================*/
2219 dict_index_t
* index
, /* in: index */
2220 dtuple_t
* entry
, /* in: index entry to make */
2221 dtuple_t
* row
) /* in: row */
2223 dict_field_t
* ind_field
;
2225 dfield_t
* row_field
;
2229 ut_ad(entry
&& row
);
2231 n_fields
= dtuple_get_n_fields(entry
);
2233 for (i
= 0; i
< n_fields
; i
++) {
2234 field
= dtuple_get_nth_field(entry
, i
);
2235 ind_field
= dict_index_get_nth_field(index
, i
);
2237 row_field
= dtuple_get_nth_field(row
, ind_field
->col
->ind
);
2239 /* Check column prefix indexes */
2240 if (ind_field
->prefix_len
> 0
2241 && dfield_get_len(row_field
) != UNIV_SQL_NULL
) {
2243 const dict_col_t
* col
2244 = dict_field_get_col(ind_field
);
2246 field
->len
= dtype_get_at_most_n_mbchars(
2247 col
->prtype
, col
->mbminlen
, col
->mbmaxlen
,
2248 ind_field
->prefix_len
,
2249 row_field
->len
, row_field
->data
);
2251 field
->len
= row_field
->len
;
2254 field
->data
= row_field
->data
;
2258 /***************************************************************
2259 Inserts a single index entry to the table. */
2262 row_ins_index_entry_step(
2263 /*=====================*/
2264 /* out: DB_SUCCESS if operation successfully
2265 completed, else error code or DB_LOCK_WAIT */
2266 ins_node_t
* node
, /* in: row insert node */
2267 que_thr_t
* thr
) /* in: query thread */
2271 ut_ad(dtuple_check_typed(node
->row
));
2273 row_ins_index_entry_set_vals(node
->index
, node
->entry
, node
->row
);
2275 ut_ad(dtuple_check_typed(node
->entry
));
2277 err
= row_ins_index_entry(node
->index
, node
->entry
, NULL
, 0, thr
);
2282 /***************************************************************
2283 Allocates a row id for row and inits the node->index field. */
2286 row_ins_alloc_row_id_step(
2287 /*======================*/
2288 ins_node_t
* node
) /* in: row insert node */
2292 ut_ad(node
->state
== INS_NODE_ALLOC_ROW_ID
);
2294 if (dict_table_get_first_index(node
->table
)->type
& DICT_UNIQUE
) {
2296 /* No row id is stored if the clustered index is unique */
2301 /* Fill in row id value to row */
2303 row_id
= dict_sys_get_new_row_id();
2305 dict_sys_write_row_id(node
->row_id_buf
, row_id
);
2308 /***************************************************************
2309 Gets a row to insert from the values list. */
2312 row_ins_get_row_from_values(
2313 /*========================*/
2314 ins_node_t
* node
) /* in: row insert node */
2316 que_node_t
* list_node
;
2321 /* The field values are copied in the buffers of the select node and
2322 it is safe to use them until we fetch from select again: therefore
2323 we can just copy the pointers */
2328 list_node
= node
->values_list
;
2331 eval_exp(list_node
);
2333 dfield
= dtuple_get_nth_field(row
, i
);
2334 dfield_copy_data(dfield
, que_node_get_val(list_node
));
2337 list_node
= que_node_get_next(list_node
);
2341 /***************************************************************
2342 Gets a row to insert from the select list. */
2345 row_ins_get_row_from_select(
2346 /*========================*/
2347 ins_node_t
* node
) /* in: row insert node */
2349 que_node_t
* list_node
;
2354 /* The field values are copied in the buffers of the select node and
2355 it is safe to use them until we fetch from select again: therefore
2356 we can just copy the pointers */
2361 list_node
= node
->select
->select_list
;
2364 dfield
= dtuple_get_nth_field(row
, i
);
2365 dfield_copy_data(dfield
, que_node_get_val(list_node
));
2368 list_node
= que_node_get_next(list_node
);
2372 /***************************************************************
2373 Inserts a row to a table. */
2378 /* out: DB_SUCCESS if operation successfully
2379 completed, else error code or DB_LOCK_WAIT */
2380 ins_node_t
* node
, /* in: row insert node */
2381 que_thr_t
* thr
) /* in: query thread */
2387 if (node
->state
== INS_NODE_ALLOC_ROW_ID
) {
2389 row_ins_alloc_row_id_step(node
);
2391 node
->index
= dict_table_get_first_index(node
->table
);
2392 node
->entry
= UT_LIST_GET_FIRST(node
->entry_list
);
2394 if (node
->ins_type
== INS_SEARCHED
) {
2396 row_ins_get_row_from_select(node
);
2398 } else if (node
->ins_type
== INS_VALUES
) {
2400 row_ins_get_row_from_values(node
);
2403 node
->state
= INS_NODE_INSERT_ENTRIES
;
2406 ut_ad(node
->state
== INS_NODE_INSERT_ENTRIES
);
2408 while (node
->index
!= NULL
) {
2409 err
= row_ins_index_entry_step(node
, thr
);
2411 if (err
!= DB_SUCCESS
) {
2416 node
->index
= dict_table_get_next_index(node
->index
);
2417 node
->entry
= UT_LIST_GET_NEXT(tuple_list
, node
->entry
);
2420 ut_ad(node
->entry
== NULL
);
2422 node
->state
= INS_NODE_ALLOC_ROW_ID
;
2427 /***************************************************************
2428 Inserts a row to a table. This is a high-level function used in SQL execution
2434 /* out: query thread to run next or NULL */
2435 que_thr_t
* thr
) /* in: query thread */
2439 sel_node_t
* sel_node
;
2445 trx
= thr_get_trx(thr
);
2447 trx_start_if_not_started(trx
);
2449 node
= thr
->run_node
;
2451 ut_ad(que_node_get_type(node
) == QUE_NODE_INSERT
);
2453 parent
= que_node_get_parent(node
);
2454 sel_node
= node
->select
;
2456 if (thr
->prev_node
== parent
) {
2457 node
->state
= INS_NODE_SET_IX_LOCK
;
2460 /* If this is the first time this node is executed (or when
2461 execution resumes after wait for the table IX lock), set an
2462 IX lock on the table and reset the possible select node. MySQL's
2463 partitioned table code may also call an insert within the same
2464 SQL statement AFTER it has used this table handle to do a search.
2465 This happens, for example, when a row update moves it to another
2466 partition. In that case, we have already set the IX lock on the
2467 table during the search operation, and there is no need to set
2468 it again here. But we must write trx->id to node->trx_id_buf. */
2470 trx_write_trx_id(node
->trx_id_buf
, trx
->id
);
2472 if (node
->state
== INS_NODE_SET_IX_LOCK
) {
2474 /* It may be that the current session has not yet started
2475 its transaction, or it has been committed: */
2477 if (UT_DULINT_EQ(trx
->id
, node
->trx_id
)) {
2478 /* No need to do IX-locking */
2483 err
= lock_table(0, node
->table
, LOCK_IX
, thr
);
2485 if (err
!= DB_SUCCESS
) {
2487 goto error_handling
;
2490 node
->trx_id
= trx
->id
;
2492 node
->state
= INS_NODE_ALLOC_ROW_ID
;
2494 if (node
->ins_type
== INS_SEARCHED
) {
2495 /* Reset the cursor */
2496 sel_node
->state
= SEL_NODE_OPEN
;
2498 /* Fetch a row to insert */
2500 thr
->run_node
= sel_node
;
2506 if ((node
->ins_type
== INS_SEARCHED
)
2507 && (sel_node
->state
!= SEL_NODE_FETCH
)) {
2509 ut_ad(sel_node
->state
== SEL_NODE_NO_MORE_ROWS
);
2511 /* No more rows to insert */
2512 thr
->run_node
= parent
;
2517 /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
2519 err
= row_ins(node
, thr
);
2522 trx
->error_state
= err
;
2524 if (err
!= DB_SUCCESS
) {
2525 /* err == DB_LOCK_WAIT or SQL error detected */
2529 /* DO THE TRIGGER ACTIONS HERE */
2531 if (node
->ins_type
== INS_SEARCHED
) {
2532 /* Fetch a row to insert */
2534 thr
->run_node
= sel_node
;
2536 thr
->run_node
= que_node_get_parent(node
);