mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / storage / innodb_plugin / trx / trx0rec.c
blob4de0ed8f9b81e8a112324b12060844d0fbc08dcf
1 /*****************************************************************************
3 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
17 *****************************************************************************/
19 /**************************************************//**
20 @file trx/trx0rec.c
21 Transaction undo log record
23 Created 3/26/1996 Heikki Tuuri
24 *******************************************************/
26 #include "trx0rec.h"
28 #ifdef UNIV_NONINL
29 #include "trx0rec.ic"
30 #endif
32 #include "fsp0fsp.h"
33 #include "mach0data.h"
34 #include "trx0undo.h"
35 #include "mtr0log.h"
36 #ifndef UNIV_HOTBACKUP
37 #include "dict0dict.h"
38 #include "ut0mem.h"
39 #include "read0read.h"
40 #include "row0ext.h"
41 #include "row0upd.h"
42 #include "que0que.h"
43 #include "trx0purge.h"
44 #include "trx0rseg.h"
45 #include "row0row.h"
47 /*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
49 /**********************************************************************//**
50 Writes the mtr log entry of the inserted undo log record on the undo log
51 page. */
52 UNIV_INLINE
53 void
54 trx_undof_page_add_undo_rec_log(
55 /*============================*/
56 page_t* undo_page, /*!< in: undo log page */
57 ulint old_free, /*!< in: start offset of the inserted entry */
58 ulint new_free, /*!< in: end offset of the entry */
59 mtr_t* mtr) /*!< in: mtr */
61 byte* log_ptr;
62 const byte* log_end;
63 ulint len;
65 log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
67 if (log_ptr == NULL) {
69 return;
72 log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
73 log_ptr = mlog_write_initial_log_record_fast(
74 undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
75 len = new_free - old_free - 4;
77 mach_write_to_2(log_ptr, len);
78 log_ptr += 2;
80 if (log_ptr + len <= log_end) {
81 memcpy(log_ptr, undo_page + old_free + 2, len);
82 mlog_close(mtr, log_ptr + len);
83 } else {
84 mlog_close(mtr, log_ptr);
85 mlog_catenate_string(mtr, undo_page + old_free + 2, len);
88 #endif /* !UNIV_HOTBACKUP */
90 /***********************************************************//**
91 Parses a redo log record of adding an undo log record.
92 @return end of log record or NULL */
93 UNIV_INTERN
94 byte*
95 trx_undo_parse_add_undo_rec(
96 /*========================*/
97 byte* ptr, /*!< in: buffer */
98 byte* end_ptr,/*!< in: buffer end */
99 page_t* page) /*!< in: page or NULL */
101 ulint len;
102 byte* rec;
103 ulint first_free;
105 if (end_ptr < ptr + 2) {
107 return(NULL);
110 len = mach_read_from_2(ptr);
111 ptr += 2;
113 if (end_ptr < ptr + len) {
115 return(NULL);
118 if (page == NULL) {
120 return(ptr + len);
123 first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
124 + TRX_UNDO_PAGE_FREE);
125 rec = page + first_free;
127 mach_write_to_2(rec, first_free + 4 + len);
128 mach_write_to_2(rec + 2 + len, first_free);
130 mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
131 first_free + 4 + len);
132 ut_memcpy(rec + 2, ptr, len);
134 return(ptr + len);
137 #ifndef UNIV_HOTBACKUP
138 /**********************************************************************//**
139 Calculates the free space left for extending an undo log record.
140 @return bytes left */
141 UNIV_INLINE
142 ulint
143 trx_undo_left(
144 /*==========*/
145 const page_t* page, /*!< in: undo log page */
146 const byte* ptr) /*!< in: pointer to page */
148 /* The '- 10' is a safety margin, in case we have some small
149 calculation error below */
151 return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
154 /**********************************************************************//**
155 Set the next and previous pointers in the undo page for the undo record
156 that was written to ptr. Update the first free value by the number of bytes
157 written for this undo record.
158 @return offset of the inserted entry on the page if succeeded, 0 if fail */
159 static
160 ulint
161 trx_undo_page_set_next_prev_and_add(
162 /*================================*/
163 page_t* undo_page, /*!< in/out: undo log page */
164 byte* ptr, /*!< in: ptr up to where data has been
165 written on this undo page. */
166 mtr_t* mtr) /*!< in: mtr */
168 ulint first_free; /*!< offset within undo_page */
169 ulint end_of_rec; /*!< offset within undo_page */
170 byte* ptr_to_first_free;
171 /* pointer within undo_page
172 that points to the next free
173 offset value within undo_page.*/
175 ut_ad(ptr > undo_page);
176 ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
178 if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
180 return(0);
183 ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
185 first_free = mach_read_from_2(ptr_to_first_free);
187 /* Write offset of the previous undo log record */
188 mach_write_to_2(ptr, first_free);
189 ptr += 2;
191 end_of_rec = ptr - undo_page;
193 /* Write offset of the next undo log record */
194 mach_write_to_2(undo_page + first_free, end_of_rec);
196 /* Update the offset to first free undo record */
197 mach_write_to_2(ptr_to_first_free, end_of_rec);
199 /* Write this log entry to the UNDO log */
200 trx_undof_page_add_undo_rec_log(undo_page, first_free,
201 end_of_rec, mtr);
203 return(first_free);
206 /**********************************************************************//**
207 Reports in the undo log of an insert of a clustered index record.
208 @return offset of the inserted entry on the page if succeed, 0 if fail */
209 static
210 ulint
211 trx_undo_page_report_insert(
212 /*========================*/
213 page_t* undo_page, /*!< in: undo log page */
214 trx_t* trx, /*!< in: transaction */
215 dict_index_t* index, /*!< in: clustered index */
216 const dtuple_t* clust_entry, /*!< in: index entry which will be
217 inserted to the clustered index */
218 mtr_t* mtr) /*!< in: mtr */
220 ulint first_free;
221 byte* ptr;
222 ulint i;
224 ut_ad(dict_index_is_clust(index));
225 ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
226 + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
228 first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
229 + TRX_UNDO_PAGE_FREE);
230 ptr = undo_page + first_free;
232 ut_ad(first_free <= UNIV_PAGE_SIZE);
234 if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
236 /* Not enough space for writing the general parameters */
238 return(0);
241 /* Reserve 2 bytes for the pointer to the next undo log record */
242 ptr += 2;
244 /* Store first some general parameters to the undo log */
245 *ptr++ = TRX_UNDO_INSERT_REC;
246 ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
247 ptr += mach_dulint_write_much_compressed(ptr, index->table->id);
248 /*----------------------------------------*/
249 /* Store then the fields required to uniquely determine the record
250 to be inserted in the clustered index */
252 for (i = 0; i < dict_index_get_n_unique(index); i++) {
254 const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
255 ulint flen = dfield_get_len(field);
257 if (trx_undo_left(undo_page, ptr) < 5) {
259 return(0);
262 ptr += mach_write_compressed(ptr, flen);
264 if (flen != UNIV_SQL_NULL) {
265 if (trx_undo_left(undo_page, ptr) < flen) {
267 return(0);
270 ut_memcpy(ptr, dfield_get_data(field), flen);
271 ptr += flen;
275 return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
278 /**********************************************************************//**
279 Reads from an undo log record the general parameters.
280 @return remaining part of undo log record after reading these values */
281 UNIV_INTERN
282 byte*
283 trx_undo_rec_get_pars(
284 /*==================*/
285 trx_undo_rec_t* undo_rec, /*!< in: undo log record */
286 ulint* type, /*!< out: undo record type:
287 TRX_UNDO_INSERT_REC, ... */
288 ulint* cmpl_info, /*!< out: compiler info, relevant only
289 for update type records */
290 ibool* updated_extern, /*!< out: TRUE if we updated an
291 externally stored fild */
292 undo_no_t* undo_no, /*!< out: undo log record number */
293 dulint* table_id) /*!< out: table id */
295 byte* ptr;
296 ulint type_cmpl;
298 ptr = undo_rec + 2;
300 type_cmpl = mach_read_from_1(ptr);
301 ptr++;
303 if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
304 *updated_extern = TRUE;
305 type_cmpl -= TRX_UNDO_UPD_EXTERN;
306 } else {
307 *updated_extern = FALSE;
310 *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
311 *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
313 *undo_no = mach_dulint_read_much_compressed(ptr);
314 ptr += mach_dulint_get_much_compressed_size(*undo_no);
316 *table_id = mach_dulint_read_much_compressed(ptr);
317 ptr += mach_dulint_get_much_compressed_size(*table_id);
319 return(ptr);
322 /**********************************************************************//**
323 Reads from an undo log record a stored column value.
324 @return remaining part of undo log record after reading these values */
325 static
326 byte*
327 trx_undo_rec_get_col_val(
328 /*=====================*/
329 byte* ptr, /*!< in: pointer to remaining part of undo log record */
330 byte** field, /*!< out: pointer to stored field */
331 ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
332 ulint* orig_len)/*!< out: original length of the locally
333 stored part of an externally stored column, or 0 */
335 *len = mach_read_compressed(ptr);
336 ptr += mach_get_compressed_size(*len);
338 *orig_len = 0;
340 switch (*len) {
341 case UNIV_SQL_NULL:
342 *field = NULL;
343 break;
344 case UNIV_EXTERN_STORAGE_FIELD:
345 *orig_len = mach_read_compressed(ptr);
346 ptr += mach_get_compressed_size(*orig_len);
347 *len = mach_read_compressed(ptr);
348 ptr += mach_get_compressed_size(*len);
349 *field = ptr;
350 ptr += *len;
352 ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
353 ut_ad(*len > *orig_len);
354 /* @see dtuple_convert_big_rec() */
355 ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
356 /* we do not have access to index->table here
357 ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
358 || *len >= REC_MAX_INDEX_COL_LEN
359 + BTR_EXTERN_FIELD_REF_SIZE);
362 *len += UNIV_EXTERN_STORAGE_FIELD;
363 break;
364 default:
365 *field = ptr;
366 if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
367 ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
368 } else {
369 ptr += *len;
373 return(ptr);
376 /*******************************************************************//**
377 Builds a row reference from an undo log record.
378 @return pointer to remaining part of undo record */
379 UNIV_INTERN
380 byte*
381 trx_undo_rec_get_row_ref(
382 /*=====================*/
383 byte* ptr, /*!< in: remaining part of a copy of an undo log
384 record, at the start of the row reference;
385 NOTE that this copy of the undo log record must
386 be preserved as long as the row reference is
387 used, as we do NOT copy the data in the
388 record! */
389 dict_index_t* index, /*!< in: clustered index */
390 dtuple_t** ref, /*!< out, own: row reference */
391 mem_heap_t* heap) /*!< in: memory heap from which the memory
392 needed is allocated */
394 ulint ref_len;
395 ulint i;
397 ut_ad(index && ptr && ref && heap);
398 ut_a(dict_index_is_clust(index));
400 ref_len = dict_index_get_n_unique(index);
402 *ref = dtuple_create(heap, ref_len);
404 dict_index_copy_types(*ref, index, ref_len);
406 for (i = 0; i < ref_len; i++) {
407 dfield_t* dfield;
408 byte* field;
409 ulint len;
410 ulint orig_len;
412 dfield = dtuple_get_nth_field(*ref, i);
414 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
416 dfield_set_data(dfield, field, len);
419 return(ptr);
422 /*******************************************************************//**
423 Skips a row reference from an undo log record.
424 @return pointer to remaining part of undo record */
425 UNIV_INTERN
426 byte*
427 trx_undo_rec_skip_row_ref(
428 /*======================*/
429 byte* ptr, /*!< in: remaining part in update undo log
430 record, at the start of the row reference */
431 dict_index_t* index) /*!< in: clustered index */
433 ulint ref_len;
434 ulint i;
436 ut_ad(index && ptr);
437 ut_a(dict_index_is_clust(index));
439 ref_len = dict_index_get_n_unique(index);
441 for (i = 0; i < ref_len; i++) {
442 byte* field;
443 ulint len;
444 ulint orig_len;
446 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
449 return(ptr);
452 /**********************************************************************//**
453 Fetch a prefix of an externally stored column, for writing to the undo log
454 of an update or delete marking of a clustered index record.
455 @return ext_buf */
456 static
457 byte*
458 trx_undo_page_fetch_ext(
459 /*====================*/
460 byte* ext_buf, /*!< in: a buffer of
461 REC_MAX_INDEX_COL_LEN
462 + BTR_EXTERN_FIELD_REF_SIZE */
463 ulint zip_size, /*!< compressed page size in bytes,
464 or 0 for uncompressed BLOB */
465 const byte* field, /*!< in: an externally stored column */
466 ulint* len) /*!< in: length of field;
467 out: used length of ext_buf */
469 /* Fetch the BLOB. */
470 ulint ext_len = btr_copy_externally_stored_field_prefix(
471 ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
472 /* BLOBs should always be nonempty. */
473 ut_a(ext_len);
474 /* Append the BLOB pointer to the prefix. */
475 memcpy(ext_buf + ext_len,
476 field + *len - BTR_EXTERN_FIELD_REF_SIZE,
477 BTR_EXTERN_FIELD_REF_SIZE);
478 *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
479 return(ext_buf);
482 /**********************************************************************//**
483 Writes to the undo log a prefix of an externally stored column.
484 @return undo log position */
485 static
486 byte*
487 trx_undo_page_report_modify_ext(
488 /*============================*/
489 byte* ptr, /*!< in: undo log position,
490 at least 15 bytes must be available */
491 byte* ext_buf, /*!< in: a buffer of
492 REC_MAX_INDEX_COL_LEN
493 + BTR_EXTERN_FIELD_REF_SIZE,
494 or NULL when should not fetch
495 a longer prefix */
496 ulint zip_size, /*!< compressed page size in bytes,
497 or 0 for uncompressed BLOB */
498 const byte** field, /*!< in/out: the locally stored part of
499 the externally stored column */
500 ulint* len) /*!< in/out: length of field, in bytes */
502 if (ext_buf) {
503 /* If an ordering column is externally stored, we will
504 have to store a longer prefix of the field. In this
505 case, write to the log a marker followed by the
506 original length and the real length of the field. */
507 ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
509 ptr += mach_write_compressed(ptr, *len);
511 *field = trx_undo_page_fetch_ext(ext_buf, zip_size,
512 *field, len);
514 ptr += mach_write_compressed(ptr, *len);
515 } else {
516 ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
517 + *len);
520 return(ptr);
523 /**********************************************************************//**
524 Reports in the undo log of an update or delete marking of a clustered index
525 record.
526 @return byte offset of the inserted undo log entry on the page if
527 succeed, 0 if fail */
528 static
529 ulint
530 trx_undo_page_report_modify(
531 /*========================*/
532 page_t* undo_page, /*!< in: undo log page */
533 trx_t* trx, /*!< in: transaction */
534 dict_index_t* index, /*!< in: clustered index where update or
535 delete marking is done */
536 const rec_t* rec, /*!< in: clustered index record which
537 has NOT yet been modified */
538 const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
539 const upd_t* update, /*!< in: update vector which tells the
540 columns to be updated; in the case of
541 a delete, this should be set to NULL */
542 ulint cmpl_info, /*!< in: compiler info on secondary
543 index updates */
544 mtr_t* mtr) /*!< in: mtr */
546 dict_table_t* table;
547 ulint first_free;
548 byte* ptr;
549 const byte* field;
550 ulint flen;
551 ulint col_no;
552 ulint type_cmpl;
553 byte* type_cmpl_ptr;
554 ulint i;
555 trx_id_t trx_id;
556 ibool ignore_prefix = FALSE;
557 byte ext_buf[REC_MAX_INDEX_COL_LEN
558 + BTR_EXTERN_FIELD_REF_SIZE];
560 ut_a(dict_index_is_clust(index));
561 ut_ad(rec_offs_validate(rec, index, offsets));
562 ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
563 + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
564 table = index->table;
566 first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
567 + TRX_UNDO_PAGE_FREE);
568 ptr = undo_page + first_free;
570 ut_ad(first_free <= UNIV_PAGE_SIZE);
572 if (trx_undo_left(undo_page, ptr) < 50) {
574 /* NOTE: the value 50 must be big enough so that the general
575 fields written below fit on the undo log page */
577 return(0);
580 /* Reserve 2 bytes for the pointer to the next undo log record */
581 ptr += 2;
583 /* Store first some general parameters to the undo log */
585 if (!update) {
586 type_cmpl = TRX_UNDO_DEL_MARK_REC;
587 } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
588 type_cmpl = TRX_UNDO_UPD_DEL_REC;
589 /* We are about to update a delete marked record.
590 We don't typically need the prefix in this case unless
591 the delete marking is done by the same transaction
592 (which we check below). */
593 ignore_prefix = TRUE;
594 } else {
595 type_cmpl = TRX_UNDO_UPD_EXIST_REC;
598 type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
599 type_cmpl_ptr = ptr;
601 *ptr++ = (byte) type_cmpl;
602 ptr += mach_dulint_write_much_compressed(ptr, trx->undo_no);
604 ptr += mach_dulint_write_much_compressed(ptr, table->id);
606 /*----------------------------------------*/
607 /* Store the state of the info bits */
609 *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
611 /* Store the values of the system columns */
612 field = rec_get_nth_field(rec, offsets,
613 dict_index_get_sys_col_pos(
614 index, DATA_TRX_ID), &flen);
615 ut_ad(flen == DATA_TRX_ID_LEN);
617 trx_id = trx_read_trx_id(field);
619 /* If it is an update of a delete marked record, then we are
620 allowed to ignore blob prefixes if the delete marking was done
621 by some other trx as it must have committed by now for us to
622 allow an over-write. */
623 if (ignore_prefix) {
624 ignore_prefix = ut_dulint_cmp(trx_id, trx->id) != 0;
626 ptr += mach_dulint_write_compressed(ptr, trx_id);
628 field = rec_get_nth_field(rec, offsets,
629 dict_index_get_sys_col_pos(
630 index, DATA_ROLL_PTR), &flen);
631 ut_ad(flen == DATA_ROLL_PTR_LEN);
633 ptr += mach_dulint_write_compressed(ptr, trx_read_roll_ptr(field));
635 /*----------------------------------------*/
636 /* Store then the fields required to uniquely determine the
637 record which will be modified in the clustered index */
639 for (i = 0; i < dict_index_get_n_unique(index); i++) {
641 field = rec_get_nth_field(rec, offsets, i, &flen);
643 /* The ordering columns must not be stored externally. */
644 ut_ad(!rec_offs_nth_extern(offsets, i));
645 ut_ad(dict_index_get_nth_col(index, i)->ord_part);
647 if (trx_undo_left(undo_page, ptr) < 5) {
649 return(0);
652 ptr += mach_write_compressed(ptr, flen);
654 if (flen != UNIV_SQL_NULL) {
655 if (trx_undo_left(undo_page, ptr) < flen) {
657 return(0);
660 ut_memcpy(ptr, field, flen);
661 ptr += flen;
665 /*----------------------------------------*/
666 /* Save to the undo log the old values of the columns to be updated. */
668 if (update) {
669 if (trx_undo_left(undo_page, ptr) < 5) {
671 return(0);
674 ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
676 for (i = 0; i < upd_get_n_fields(update); i++) {
678 ulint pos = upd_get_nth_field(update, i)->field_no;
680 /* Write field number to undo log */
681 if (trx_undo_left(undo_page, ptr) < 5) {
683 return(0);
686 ptr += mach_write_compressed(ptr, pos);
688 /* Save the old value of field */
689 field = rec_get_nth_field(rec, offsets, pos, &flen);
691 if (trx_undo_left(undo_page, ptr) < 15) {
693 return(0);
696 if (rec_offs_nth_extern(offsets, pos)) {
697 ptr = trx_undo_page_report_modify_ext(
698 ptr,
699 dict_index_get_nth_col(index, pos)
700 ->ord_part
701 && !ignore_prefix
702 && flen < REC_MAX_INDEX_COL_LEN
703 ? ext_buf : NULL,
704 dict_table_zip_size(table),
705 &field, &flen);
707 /* Notify purge that it eventually has to
708 free the old externally stored field */
710 trx->update_undo->del_marks = TRUE;
712 *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
713 } else {
714 ptr += mach_write_compressed(ptr, flen);
717 if (flen != UNIV_SQL_NULL) {
718 if (trx_undo_left(undo_page, ptr) < flen) {
720 return(0);
723 ut_memcpy(ptr, field, flen);
724 ptr += flen;
729 /*----------------------------------------*/
730 /* In the case of a delete marking, and also in the case of an update
731 where any ordering field of any index changes, store the values of all
732 columns which occur as ordering fields in any index. This info is used
733 in the purge of old versions where we use it to build and search the
734 delete marked index records, to look if we can remove them from the
735 index tree. Note that starting from 4.0.14 also externally stored
736 fields can be ordering in some index. Starting from 5.2, we no longer
737 store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
738 but we can construct the column prefix fields in the index by
739 fetching the first page of the BLOB that is pointed to by the
740 clustered index. This works also in crash recovery, because all pages
741 (including BLOBs) are recovered before anything is rolled back. */
743 if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
744 byte* old_ptr = ptr;
746 trx->update_undo->del_marks = TRUE;
748 if (trx_undo_left(undo_page, ptr) < 5) {
750 return(0);
753 /* Reserve 2 bytes to write the number of bytes the stored
754 fields take in this undo record */
756 ptr += 2;
758 for (col_no = 0; col_no < dict_table_get_n_cols(table);
759 col_no++) {
761 const dict_col_t* col
762 = dict_table_get_nth_col(table, col_no);
764 if (col->ord_part) {
765 ulint pos;
767 /* Write field number to undo log */
768 if (trx_undo_left(undo_page, ptr) < 5 + 15) {
770 return(0);
773 pos = dict_index_get_nth_col_pos(index,
774 col_no);
775 ptr += mach_write_compressed(ptr, pos);
777 /* Save the old value of field */
778 field = rec_get_nth_field(rec, offsets, pos,
779 &flen);
781 if (rec_offs_nth_extern(offsets, pos)) {
782 ptr = trx_undo_page_report_modify_ext(
783 ptr,
784 flen < REC_MAX_INDEX_COL_LEN
785 && !ignore_prefix
786 ? ext_buf : NULL,
787 dict_table_zip_size(table),
788 &field, &flen);
789 } else {
790 ptr += mach_write_compressed(
791 ptr, flen);
794 if (flen != UNIV_SQL_NULL) {
795 if (trx_undo_left(undo_page, ptr)
796 < flen) {
798 return(0);
801 ut_memcpy(ptr, field, flen);
802 ptr += flen;
807 mach_write_to_2(old_ptr, ptr - old_ptr);
810 /*----------------------------------------*/
811 /* Write pointers to the previous and the next undo log records */
812 if (trx_undo_left(undo_page, ptr) < 2) {
814 return(0);
817 mach_write_to_2(ptr, first_free);
818 ptr += 2;
819 mach_write_to_2(undo_page + first_free, ptr - undo_page);
821 mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
822 ptr - undo_page);
824 /* Write to the REDO log about this change in the UNDO log */
826 trx_undof_page_add_undo_rec_log(undo_page, first_free,
827 ptr - undo_page, mtr);
828 return(first_free);
831 /**********************************************************************//**
832 Reads from an undo log update record the system field values of the old
833 version.
834 @return remaining part of undo log record after reading these values */
835 UNIV_INTERN
836 byte*
837 trx_undo_update_rec_get_sys_cols(
838 /*=============================*/
839 byte* ptr, /*!< in: remaining part of undo
840 log record after reading
841 general parameters */
842 trx_id_t* trx_id, /*!< out: trx id */
843 roll_ptr_t* roll_ptr, /*!< out: roll ptr */
844 ulint* info_bits) /*!< out: info bits state */
846 /* Read the state of the info bits */
847 *info_bits = mach_read_from_1(ptr);
848 ptr += 1;
850 /* Read the values of the system columns */
852 *trx_id = mach_dulint_read_compressed(ptr);
853 ptr += mach_dulint_get_compressed_size(*trx_id);
855 *roll_ptr = mach_dulint_read_compressed(ptr);
856 ptr += mach_dulint_get_compressed_size(*roll_ptr);
858 return(ptr);
861 /**********************************************************************//**
862 Reads from an update undo log record the number of updated fields.
863 @return remaining part of undo log record after reading this value */
864 UNIV_INLINE
865 byte*
866 trx_undo_update_rec_get_n_upd_fields(
867 /*=================================*/
868 byte* ptr, /*!< in: pointer to remaining part of undo log record */
869 ulint* n) /*!< out: number of fields */
871 *n = mach_read_compressed(ptr);
872 ptr += mach_get_compressed_size(*n);
874 return(ptr);
877 /**********************************************************************//**
878 Reads from an update undo log record a stored field number.
879 @return remaining part of undo log record after reading this value */
880 UNIV_INLINE
881 byte*
882 trx_undo_update_rec_get_field_no(
883 /*=============================*/
884 byte* ptr, /*!< in: pointer to remaining part of undo log record */
885 ulint* field_no)/*!< out: field number */
887 *field_no = mach_read_compressed(ptr);
888 ptr += mach_get_compressed_size(*field_no);
890 return(ptr);
893 /*******************************************************************//**
894 Builds an update vector based on a remaining part of an undo log record.
895 @return remaining part of the record, NULL if an error detected, which
896 means that the record is corrupted */
897 UNIV_INTERN
898 byte*
899 trx_undo_update_rec_get_update(
900 /*===========================*/
901 byte* ptr, /*!< in: remaining part in update undo log
902 record, after reading the row reference
903 NOTE that this copy of the undo log record must
904 be preserved as long as the update vector is
905 used, as we do NOT copy the data in the
906 record! */
907 dict_index_t* index, /*!< in: clustered index */
908 ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
909 TRX_UNDO_UPD_DEL_REC, or
910 TRX_UNDO_DEL_MARK_REC; in the last case,
911 only trx id and roll ptr fields are added to
912 the update vector */
913 trx_id_t trx_id, /*!< in: transaction id from this undo record */
914 roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
915 ulint info_bits,/*!< in: info bits from this undo record */
916 trx_t* trx, /*!< in: transaction */
917 mem_heap_t* heap, /*!< in: memory heap from which the memory
918 needed is allocated */
919 upd_t** upd) /*!< out, own: update vector */
921 upd_field_t* upd_field;
922 upd_t* update;
923 ulint n_fields;
924 byte* buf;
925 ulint i;
927 ut_a(dict_index_is_clust(index));
929 if (type != TRX_UNDO_DEL_MARK_REC) {
930 ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
931 } else {
932 n_fields = 0;
935 update = upd_create(n_fields + 2, heap);
937 update->info_bits = info_bits;
939 /* Store first trx id and roll ptr to update vector */
941 upd_field = upd_get_nth_field(update, n_fields);
942 buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN);
943 trx_write_trx_id(buf, trx_id);
945 upd_field_set_field_no(upd_field,
946 dict_index_get_sys_col_pos(index, DATA_TRX_ID),
947 index, trx);
948 dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
950 upd_field = upd_get_nth_field(update, n_fields + 1);
951 buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN);
952 trx_write_roll_ptr(buf, roll_ptr);
954 upd_field_set_field_no(
955 upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
956 index, trx);
957 dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
959 /* Store then the updated ordinary columns to the update vector */
961 for (i = 0; i < n_fields; i++) {
963 byte* field;
964 ulint len;
965 ulint field_no;
966 ulint orig_len;
968 ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
970 if (field_no >= dict_index_get_n_fields(index)) {
971 fprintf(stderr,
972 "InnoDB: Error: trying to access"
973 " update undo rec field %lu in ",
974 (ulong) field_no);
975 dict_index_name_print(stderr, trx, index);
976 fprintf(stderr, "\n"
977 "InnoDB: but index has only %lu fields\n"
978 "InnoDB: Submit a detailed bug report"
979 " to http://bugs.mysql.com\n"
980 "InnoDB: Run also CHECK TABLE ",
981 (ulong) dict_index_get_n_fields(index));
982 ut_print_name(stderr, trx, TRUE, index->table_name);
983 fprintf(stderr, "\n"
984 "InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
985 (ulong) n_fields, (ulong) i, ptr);
986 *upd = NULL;
987 return(NULL);
990 upd_field = upd_get_nth_field(update, i);
992 upd_field_set_field_no(upd_field, field_no, index, trx);
994 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
996 upd_field->orig_len = orig_len;
998 if (len == UNIV_SQL_NULL) {
999 dfield_set_null(&upd_field->new_val);
1000 } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
1001 dfield_set_data(&upd_field->new_val, field, len);
1002 } else {
1003 len -= UNIV_EXTERN_STORAGE_FIELD;
1005 dfield_set_data(&upd_field->new_val, field, len);
1006 dfield_set_ext(&upd_field->new_val);
1010 *upd = update;
1012 return(ptr);
1015 /*******************************************************************//**
1016 Builds a partial row from an update undo log record. It contains the
1017 columns which occur as ordering in any index of the table.
1018 @return pointer to remaining part of undo record */
1019 UNIV_INTERN
1020 byte*
1021 trx_undo_rec_get_partial_row(
1022 /*=========================*/
1023 byte* ptr, /*!< in: remaining part in update undo log
1024 record of a suitable type, at the start of
1025 the stored index columns;
1026 NOTE that this copy of the undo log record must
1027 be preserved as long as the partial row is
1028 used, as we do NOT copy the data in the
1029 record! */
1030 dict_index_t* index, /*!< in: clustered index */
1031 dtuple_t** row, /*!< out, own: partial row */
1032 ibool ignore_prefix, /*!< in: flag to indicate if we
1033 expect blob prefixes in undo. Used
1034 only in the assertion. */
1035 mem_heap_t* heap) /*!< in: memory heap from which the memory
1036 needed is allocated */
1038 const byte* end_ptr;
1039 ulint row_len;
1041 ut_ad(index);
1042 ut_ad(ptr);
1043 ut_ad(row);
1044 ut_ad(heap);
1045 ut_ad(dict_index_is_clust(index));
1047 row_len = dict_table_get_n_cols(index->table);
1049 *row = dtuple_create(heap, row_len);
1051 dict_table_copy_types(*row, index->table);
1053 end_ptr = ptr + mach_read_from_2(ptr);
1054 ptr += 2;
1056 while (ptr != end_ptr) {
1057 dfield_t* dfield;
1058 byte* field;
1059 ulint field_no;
1060 const dict_col_t* col;
1061 ulint col_no;
1062 ulint len;
1063 ulint orig_len;
1065 ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
1067 col = dict_index_get_nth_col(index, field_no);
1068 col_no = dict_col_get_no(col);
1070 ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
1072 dfield = dtuple_get_nth_field(*row, col_no);
1074 dfield_set_data(dfield, field, len);
1076 if (len != UNIV_SQL_NULL
1077 && len >= UNIV_EXTERN_STORAGE_FIELD) {
1078 dfield_set_len(dfield,
1079 len - UNIV_EXTERN_STORAGE_FIELD);
1080 dfield_set_ext(dfield);
1081 /* If the prefix of this column is indexed,
1082 ensure that enough prefix is stored in the
1083 undo log record. */
1084 if (!ignore_prefix && col->ord_part) {
1085 ut_a(dfield_get_len(dfield)
1086 >= 2 * BTR_EXTERN_FIELD_REF_SIZE);
1087 ut_a(dict_table_get_format(index->table)
1088 >= DICT_TF_FORMAT_ZIP
1089 || dfield_get_len(dfield)
1090 >= REC_MAX_INDEX_COL_LEN
1091 + BTR_EXTERN_FIELD_REF_SIZE);
1096 return(ptr);
1098 #endif /* !UNIV_HOTBACKUP */
1100 /***********************************************************************//**
1101 Erases the unused undo log page end.
1102 @return TRUE if the page contained something, FALSE if it was empty */
1103 static __attribute__((nonnull))
1104 ibool
1105 trx_undo_erase_page_end(
1106 /*====================*/
1107 page_t* undo_page, /*!< in/out: undo page whose end to erase */
1108 mtr_t* mtr) /*!< in/out: mini-transaction */
1110 ulint first_free;
1112 first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
1113 + TRX_UNDO_PAGE_FREE);
1114 memset(undo_page + first_free, 0xff,
1115 (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
1117 mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
1118 return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
1121 /***********************************************************//**
1122 Parses a redo log record of erasing of an undo page end.
1123 @return end of log record or NULL */
1124 UNIV_INTERN
1125 byte*
1126 trx_undo_parse_erase_page_end(
1127 /*==========================*/
1128 byte* ptr, /*!< in: buffer */
1129 byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
1130 page_t* page, /*!< in: page or NULL */
1131 mtr_t* mtr) /*!< in: mtr or NULL */
1133 ut_ad(ptr && end_ptr);
1135 if (page == NULL) {
1137 return(ptr);
1140 trx_undo_erase_page_end(page, mtr);
1142 return(ptr);
1145 #ifndef UNIV_HOTBACKUP
1146 /***********************************************************************//**
1147 Writes information to an undo log about an insert, update, or a delete marking
1148 of a clustered index record. This information is used in a rollback of the
1149 transaction and in consistent reads that must look to the history of this
1150 transaction.
1151 @return DB_SUCCESS or error code */
1152 UNIV_INTERN
1153 ulint
1154 trx_undo_report_row_operation(
1155 /*==========================*/
1156 ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is
1157 set, does nothing */
1158 ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or
1159 TRX_UNDO_MODIFY_OP */
1160 que_thr_t* thr, /*!< in: query thread */
1161 dict_index_t* index, /*!< in: clustered index */
1162 const dtuple_t* clust_entry, /*!< in: in the case of an insert,
1163 index entry to insert into the
1164 clustered index, otherwise NULL */
1165 const upd_t* update, /*!< in: in the case of an update,
1166 the update vector, otherwise NULL */
1167 ulint cmpl_info, /*!< in: compiler info on secondary
1168 index updates */
1169 const rec_t* rec, /*!< in: in case of an update or delete
1170 marking, the record in the clustered
1171 index, otherwise NULL */
1172 roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
1173 inserted undo log record,
1174 ut_dulint_zero if BTR_NO_UNDO_LOG
1175 flag was specified */
1177 trx_t* trx;
1178 trx_undo_t* undo;
1179 ulint page_no;
1180 buf_block_t* undo_block;
1181 trx_rseg_t* rseg;
1182 mtr_t mtr;
1183 ulint err = DB_SUCCESS;
1184 mem_heap_t* heap = NULL;
1185 ulint offsets_[REC_OFFS_NORMAL_SIZE];
1186 ulint* offsets = offsets_;
1187 #ifdef UNIV_DEBUG
1188 int loop_count = 0;
1189 #endif /* UNIV_DEBUG */
1190 rec_offs_init(offsets_);
1192 ut_a(dict_index_is_clust(index));
1194 if (flags & BTR_NO_UNDO_LOG_FLAG) {
1196 *roll_ptr = ut_dulint_zero;
1198 return(DB_SUCCESS);
1201 ut_ad(thr);
1202 ut_ad((op_type != TRX_UNDO_INSERT_OP)
1203 || (clust_entry && !update && !rec));
1205 trx = thr_get_trx(thr);
1206 rseg = trx->rseg;
1208 mutex_enter(&(trx->undo_mutex));
1210 /* If the undo log is not assigned yet, assign one */
1212 if (op_type == TRX_UNDO_INSERT_OP) {
1214 if (trx->insert_undo == NULL) {
1216 err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
1219 undo = trx->insert_undo;
1221 if (UNIV_UNLIKELY(!undo)) {
1222 /* Did not succeed */
1223 ut_ad(err != DB_SUCCESS);
1224 mutex_exit(&(trx->undo_mutex));
1226 return(err);
1229 ut_ad(err == DB_SUCCESS);
1230 } else {
1231 ut_ad(op_type == TRX_UNDO_MODIFY_OP);
1233 if (trx->update_undo == NULL) {
1235 err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
1239 undo = trx->update_undo;
1241 if (UNIV_UNLIKELY(!undo)) {
1242 /* Did not succeed */
1243 ut_ad(err != DB_SUCCESS);
1244 mutex_exit(&(trx->undo_mutex));
1245 return(err);
1248 ut_ad(err == DB_SUCCESS);
1249 offsets = rec_get_offsets(rec, index, offsets,
1250 ULINT_UNDEFINED, &heap);
1253 mtr_start(&mtr);
1255 page_no = undo->last_page_no;
1256 undo_block = buf_page_get_gen(
1257 undo->space, undo->zip_size, page_no, RW_X_LATCH,
1258 undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr);
1259 buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
1261 do {
1262 page_t* undo_page;
1263 ulint offset;
1265 undo_page = buf_block_get_frame(undo_block);
1266 ut_ad(page_no == buf_block_get_page_no(undo_block));
1268 if (op_type == TRX_UNDO_INSERT_OP) {
1269 offset = trx_undo_page_report_insert(
1270 undo_page, trx, index, clust_entry, &mtr);
1271 } else {
1272 offset = trx_undo_page_report_modify(
1273 undo_page, trx, index, rec, offsets, update,
1274 cmpl_info, &mtr);
1277 if (UNIV_UNLIKELY(offset == 0)) {
1278 /* The record did not fit on the page. We erase the
1279 end segment of the undo log page and write a log
1280 record of it: this is to ensure that in the debug
1281 version the replicate page constructed using the log
1282 records stays identical to the original page */
1284 if (!trx_undo_erase_page_end(undo_page, &mtr)) {
1285 /* The record did not fit on an empty
1286 undo page. Discard the freshly allocated
1287 page and return an error. */
1289 /* When we remove a page from an undo
1290 log, this is analogous to a
1291 pessimistic insert in a B-tree, and we
1292 must reserve the counterpart of the
1293 tree latch, which is the rseg
1294 mutex. We must commit the mini-transaction
1295 first, because it may be holding lower-level
1296 latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
1298 mtr_commit(&mtr);
1299 mtr_start(&mtr);
1301 mutex_enter(&rseg->mutex);
1302 trx_undo_free_last_page(trx, undo, &mtr);
1303 mutex_exit(&rseg->mutex);
1305 err = DB_TOO_BIG_RECORD;
1306 goto err_exit;
1309 mtr_commit(&mtr);
1310 } else {
1311 /* Success */
1313 mtr_commit(&mtr);
1315 undo->empty = FALSE;
1316 undo->top_page_no = page_no;
1317 undo->top_offset = offset;
1318 undo->top_undo_no = trx->undo_no;
1319 undo->guess_block = undo_block;
1321 UT_DULINT_INC(trx->undo_no);
1323 mutex_exit(&trx->undo_mutex);
1325 *roll_ptr = trx_undo_build_roll_ptr(
1326 op_type == TRX_UNDO_INSERT_OP,
1327 rseg->id, page_no, offset);
1328 err = DB_SUCCESS;
1329 goto func_exit;
1332 ut_ad(page_no == undo->last_page_no);
1334 /* We have to extend the undo log by one page */
1336 ut_ad(++loop_count < 2);
1337 mtr_start(&mtr);
1339 /* When we add a page to an undo log, this is analogous to
1340 a pessimistic insert in a B-tree, and we must reserve the
1341 counterpart of the tree latch, which is the rseg mutex. */
1343 mutex_enter(&rseg->mutex);
1344 undo_block = trx_undo_add_page(trx, undo, &mtr);
1345 mutex_exit(&rseg->mutex);
1347 page_no = undo->last_page_no;
1348 } while (undo_block != NULL);
1350 /* Did not succeed: out of space */
1351 err = DB_OUT_OF_FILE_SPACE;
1353 err_exit:
1354 mutex_exit(&trx->undo_mutex);
1355 mtr_commit(&mtr);
1356 func_exit:
1357 if (UNIV_LIKELY_NULL(heap)) {
1358 mem_heap_free(heap);
1360 return(err);
1363 /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
1365 /******************************************************************//**
1366 Copies an undo record to heap. This function can be called if we know that
1367 the undo log record exists.
1368 @return own: copy of the record */
1369 UNIV_INTERN
1370 trx_undo_rec_t*
1371 trx_undo_get_undo_rec_low(
1372 /*======================*/
1373 roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
1374 mem_heap_t* heap) /*!< in: memory heap where copied */
1376 trx_undo_rec_t* undo_rec;
1377 ulint rseg_id;
1378 ulint page_no;
1379 ulint offset;
1380 const page_t* undo_page;
1381 trx_rseg_t* rseg;
1382 ibool is_insert;
1383 mtr_t mtr;
1385 trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
1386 &offset);
1387 rseg = trx_rseg_get_on_id(rseg_id);
1389 mtr_start(&mtr);
1391 undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
1392 page_no, &mtr);
1394 undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
1396 mtr_commit(&mtr);
1398 return(undo_rec);
1401 /******************************************************************//**
1402 Copies an undo record to heap.
1404 NOTE: the caller must have latches on the clustered index page and
1405 purge_view.
1407 @return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been
1408 truncated and we cannot fetch the old version */
1409 UNIV_INTERN
1410 ulint
1411 trx_undo_get_undo_rec(
1412 /*==================*/
1413 roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
1414 trx_id_t trx_id, /*!< in: id of the trx that generated
1415 the roll pointer: it points to an
1416 undo log of this transaction */
1417 trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */
1418 mem_heap_t* heap) /*!< in: memory heap where copied */
1420 #ifdef UNIV_SYNC_DEBUG
1421 ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1422 #endif /* UNIV_SYNC_DEBUG */
1424 if (!trx_purge_update_undo_must_exist(trx_id)) {
1426 /* It may be that the necessary undo log has already been
1427 deleted */
1429 return(DB_MISSING_HISTORY);
1432 *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
1434 return(DB_SUCCESS);
1437 /*******************************************************************//**
1438 Build a previous version of a clustered index record. This function checks
1439 that the caller has a latch on the index page of the clustered index record
1440 and an s-latch on the purge_view. This guarantees that the stack of versions
1441 is locked all the way down to the purge_view.
1442 @return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is
1443 earlier than purge_view, which means that it may have been removed,
1444 DB_ERROR if corrupted record */
1445 UNIV_INTERN
1446 ulint
1447 trx_undo_prev_version_build(
1448 /*========================*/
1449 const rec_t* index_rec,/*!< in: clustered index record in the
1450 index tree */
1451 mtr_t* index_mtr __attribute__((unused)),
1452 /*!< in: mtr which contains the latch to
1453 index_rec page and purge_view */
1454 const rec_t* rec, /*!< in: version of a clustered index record */
1455 dict_index_t* index, /*!< in: clustered index */
1456 ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
1457 mem_heap_t* heap, /*!< in: memory heap from which the memory
1458 needed is allocated */
1459 rec_t** old_vers)/*!< out, own: previous version, or NULL if
1460 rec is the first inserted version, or if
1461 history data has been deleted (an error),
1462 or if the purge COULD have removed the version
1463 though it has not yet done so */
1465 trx_undo_rec_t* undo_rec = NULL;
1466 dtuple_t* entry;
1467 trx_id_t rec_trx_id;
1468 ulint type;
1469 undo_no_t undo_no;
1470 dulint table_id;
1471 trx_id_t trx_id;
1472 roll_ptr_t roll_ptr;
1473 roll_ptr_t old_roll_ptr;
1474 upd_t* update;
1475 byte* ptr;
1476 ulint info_bits;
1477 ulint cmpl_info;
1478 ibool dummy_extern;
1479 byte* buf;
1480 ulint err;
1481 #ifdef UNIV_SYNC_DEBUG
1482 ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
1483 #endif /* UNIV_SYNC_DEBUG */
1484 ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
1485 || mtr_memo_contains_page(index_mtr, index_rec,
1486 MTR_MEMO_PAGE_X_FIX));
1487 ut_ad(rec_offs_validate(rec, index, offsets));
1489 if (!dict_index_is_clust(index)) {
1490 fprintf(stderr, "InnoDB: Error: trying to access"
1491 " update undo rec for non-clustered index %s\n"
1492 "InnoDB: Submit a detailed bug report to"
1493 " http://bugs.mysql.com\n"
1494 "InnoDB: index record ", index->name);
1495 rec_print(stderr, index_rec, index);
1496 fputs("\n"
1497 "InnoDB: record version ", stderr);
1498 rec_print_new(stderr, rec, offsets);
1499 putc('\n', stderr);
1500 return(DB_ERROR);
1503 roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
1504 old_roll_ptr = roll_ptr;
1506 *old_vers = NULL;
1508 if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
1510 /* The record rec is the first inserted version */
1512 return(DB_SUCCESS);
1515 rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
1517 err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap);
1519 if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
1520 /* The undo record may already have been purged.
1521 This should never happen in InnoDB. */
1523 return(err);
1526 ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
1527 &dummy_extern, &undo_no, &table_id);
1529 ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
1530 &info_bits);
1532 /* (a) If a clustered index record version is such that the
1533 trx id stamp in it is bigger than purge_sys->view, then the
1534 BLOBs in that version are known to exist (the purge has not
1535 progressed that far);
1537 (b) if the version is the first version such that trx id in it
1538 is less than purge_sys->view, and it is not delete-marked,
1539 then the BLOBs in that version are known to exist (the purge
1540 cannot have purged the BLOBs referenced by that version
1541 yet).
1543 This function does not fetch any BLOBs. The callers might, by
1544 possibly invoking row_ext_create() via row_build(). However,
1545 they should have all needed information in the *old_vers
1546 returned by this function. This is because *old_vers is based
1547 on the transaction undo log records. The function
1548 trx_undo_page_fetch_ext() will write BLOB prefixes to the
1549 transaction undo log that are at least as long as the longest
1550 possible column prefix in a secondary index. Thus, secondary
1551 index entries for *old_vers can be constructed without
1552 dereferencing any BLOB pointers. */
1554 ptr = trx_undo_rec_skip_row_ref(ptr, index);
1556 ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
1557 roll_ptr, info_bits,
1558 NULL, heap, &update);
1560 if (ut_dulint_cmp(table_id, index->table->id) != 0) {
1561 ptr = NULL;
1563 fprintf(stderr,
1564 "InnoDB: Error: trying to access update undo rec"
1565 " for table %s\n"
1566 "InnoDB: but the table id in the"
1567 " undo record is wrong\n"
1568 "InnoDB: Submit a detailed bug report"
1569 " to http://bugs.mysql.com\n"
1570 "InnoDB: Run also CHECK TABLE %s\n",
1571 index->table_name, index->table_name);
1574 if (ptr == NULL) {
1575 /* The record was corrupted, return an error; these printfs
1576 should catch an elusive bug in row_vers_old_has_index_entry */
1578 fprintf(stderr,
1579 "InnoDB: table %s, index %s, n_uniq %lu\n"
1580 "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n"
1581 "InnoDB: undo rec table id %lu %lu,"
1582 " index table id %lu %lu\n"
1583 "InnoDB: dump of 150 bytes in undo rec: ",
1584 index->table_name, index->name,
1585 (ulong) dict_index_get_n_unique(index),
1586 undo_rec, (ulong) type, (ulong) cmpl_info,
1587 (ulong) ut_dulint_get_high(table_id),
1588 (ulong) ut_dulint_get_low(table_id),
1589 (ulong) ut_dulint_get_high(index->table->id),
1590 (ulong) ut_dulint_get_low(index->table->id));
1591 ut_print_buf(stderr, undo_rec, 150);
1592 fputs("\n"
1593 "InnoDB: index record ", stderr);
1594 rec_print(stderr, index_rec, index);
1595 fputs("\n"
1596 "InnoDB: record version ", stderr);
1597 rec_print_new(stderr, rec, offsets);
1598 fprintf(stderr, "\n"
1599 "InnoDB: Record trx id " TRX_ID_FMT
1600 ", update rec trx id " TRX_ID_FMT "\n"
1601 "InnoDB: Roll ptr in rec %lu %lu, in update rec"
1602 " %lu %lu\n",
1603 TRX_ID_PREP_PRINTF(rec_trx_id),
1604 TRX_ID_PREP_PRINTF(trx_id),
1605 (ulong) ut_dulint_get_high(old_roll_ptr),
1606 (ulong) ut_dulint_get_low(old_roll_ptr),
1607 (ulong) ut_dulint_get_high(roll_ptr),
1608 (ulong) ut_dulint_get_low(roll_ptr));
1610 trx_purge_sys_print();
1611 return(DB_ERROR);
1614 # if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
1615 ut_a(!rec_offs_any_null_extern(rec, offsets));
1616 # endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
1618 if (row_upd_changes_field_size_or_external(index, offsets, update)) {
1619 ulint n_ext;
1621 /* We should confirm the existence of disowned external data,
1622 if the previous version record is delete marked. If the trx_id
1623 of the previous record is seen by purge view, we should treat
1624 it as missing history, because the disowned external data
1625 might be purged already.
1627 The inherited external data (BLOBs) can be freed (purged)
1628 after trx_id was committed, provided that no view was started
1629 before trx_id. If the purge view can see the committed
1630 delete-marked record by trx_id, no transactions need to access
1631 the BLOB. */
1633 if ((update->info_bits & REC_INFO_DELETED_FLAG)
1634 && read_view_sees_trx_id(purge_sys->view, trx_id)) {
1635 /* treat as a fresh insert, not to
1636 cause assertion error at the caller. */
1637 return(DB_SUCCESS);
1640 /* We have to set the appropriate extern storage bits in the
1641 old version of the record: the extern bits in rec for those
1642 fields that update does NOT update, as well as the bits for
1643 those fields that update updates to become externally stored
1644 fields. Store the info: */
1646 entry = row_rec_to_index_entry(ROW_COPY_DATA, rec, index,
1647 offsets, &n_ext, heap);
1648 n_ext += btr_push_update_extern_fields(entry, update, heap);
1649 /* The page containing the clustered index record
1650 corresponding to entry is latched in mtr. Thus the
1651 following call is safe. */
1652 row_upd_index_replace_new_col_vals(entry, index, update, heap);
1654 buf = mem_heap_alloc(heap, rec_get_converted_size(index, entry,
1655 n_ext));
1657 *old_vers = rec_convert_dtuple_to_rec(buf, index,
1658 entry, n_ext);
1659 } else {
1660 buf = mem_heap_alloc(heap, rec_offs_size(offsets));
1661 *old_vers = rec_copy(buf, rec, offsets);
1662 rec_offs_make_valid(*old_vers, index, offsets);
1663 row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
1666 return(DB_SUCCESS);
1668 #endif /* !UNIV_HOTBACKUP */