From 32779d396884cba6bb1c68a14db14e34f7280612 Mon Sep 17 00:00:00 2001 From: tgl Date: Sat, 6 Jun 2009 02:39:40 +0000 Subject: [PATCH] Fix a serious bug introduced into GIN in 8.4: now that MergeItemPointers() is supposed to remove duplicate heap TIDs, we have to be sure to reduce the tuple size and posting-item count accordingly in addItemPointersToTuple(). Failing to do so resulted in the effective injection of garbage TIDs into the index contents, ie, whatever happened to be in the memory palloc'd for the new tuple. I'm not sure that this fully explains the index corruption reported by Tatsuo Ishii, but the test case I'm using no longer fails. --- src/backend/access/gin/gindatapage.c | 12 ++++++--- src/backend/access/gin/ginentrypage.c | 51 ++++++++++++++++++++++++++--------- src/backend/access/gin/gininsert.c | 14 +++++----- src/include/access/gin.h | 3 ++- 4 files changed, 58 insertions(+), 22 deletions(-) diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index ab7be28c45..6ca7bffde2 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -32,10 +32,14 @@ compareItemPointers(ItemPointer a, ItemPointer b) } /* - * Merge two ordered array of itempointer + * Merge two ordered arrays of itempointers, eliminating any duplicates. + * Returns the number of items in the result. + * Caller is responsible that there is enough space at *dst. */ -void -MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb) +uint32 +MergeItemPointers(ItemPointerData *dst, + ItemPointerData *a, uint32 na, + ItemPointerData *b, uint32 nb) { ItemPointerData *dptr = dst; ItemPointerData *aptr = a, @@ -62,6 +66,8 @@ MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPoint while (bptr - b < nb) *dptr++ = *bptr++; + + return dptr - dst; } /* diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 8294ae7b56..c54dd419f6 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -20,28 +20,33 @@ #include "utils/rel.h" /* - * forms tuple for entry tree. On leaf page, Index tuple has - * non-traditional layout. Tuple may contain posting list or - * root blocknumber of posting tree. Macros GinIsPostingTre: (itup) / GinSetPostingTree(itup, blkno) + * Form a tuple for entry tree. + * + * On leaf pages, Index tuple has non-traditional layout. Tuple may contain + * posting list or root blocknumber of posting tree. + * Macros: GinIsPostingTree(itup) / GinSetPostingTree(itup, blkno) * 1) Posting list - * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual + * - itup->t_info & INDEX_SIZE_MASK contains total size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains original * size of tuple (without posting list). - * Macroses: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) + * Macros: GinGetOrigSizePosting(itup) / GinSetOrigSizePosting(itup,n) * - ItemPointerGetOffsetNumber(&itup->t_tid) contains number - * of elements in posting list (number of heap itempointer) - * Macroses: GinGetNPosting(itup) / GinSetNPosting(itup,n) - * - After usual part of tuple there is a posting list + * of elements in posting list (number of heap itempointers) + * Macros: GinGetNPosting(itup) / GinSetNPosting(itup,n) + * - After standard part of tuple there is a posting list, ie, array + * of heap itempointers * Macros: GinGetPosting(itup) * 2) Posting tree * - itup->t_info & INDEX_SIZE_MASK contains size of tuple as usual * - ItemPointerGetBlockNumber(&itup->t_tid) contains block number of * root of posting tree - * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number GIN_TREE_POSTING + * - ItemPointerGetOffsetNumber(&itup->t_tid) contains magic number + * GIN_TREE_POSTING, which distinguishes this from posting-list case * - * Storage of attributes of tuple are different for single and multicolumn index. - * For single-column index tuple stores only value to be indexed and for - * multicolumn variant it stores two attributes: column number of value and value. + * Attributes of an index tuple are different for single and multicolumn index. + * For single-column case, index tuple stores only value to be indexed. + * For multicolumn case, it stores two attributes: column number of value + * and value. */ IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd) @@ -90,6 +95,28 @@ GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData } /* + * Sometimes we reduce the number of posting list items in a tuple after + * having built it with GinFormTuple. This function adjusts the size + * fields to match. + */ +void +GinShortenTuple(IndexTuple itup, uint32 nipd) +{ + uint32 newsize; + + Assert(nipd <= GinGetNPosting(itup)); + + newsize = MAXALIGN(SHORTALIGN(GinGetOrigSizePosting(itup)) + sizeof(ItemPointerData) * nipd); + + Assert(newsize <= (itup->t_info & INDEX_SIZE_MASK)); + + itup->t_info &= ~INDEX_SIZE_MASK; + itup->t_info |= newsize; + + GinSetNPosting(itup, nipd); +} + +/* * Entry tree is a "static", ie tuple never deletes from it, * so we don't use right bound, we use rightest key instead. */ diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 0190b2508f..70bb0e351e 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -102,17 +102,19 @@ addItemPointersToTuple(Relation index, GinState *ginstate, GinBtreeStack *stack, { Datum key = gin_index_getattr(ginstate, old); OffsetNumber attnum = gintuple_get_attrnum(ginstate, old); - IndexTuple res = GinFormTuple(ginstate, attnum, key, NULL, nitem + GinGetNPosting(old)); + IndexTuple res = GinFormTuple(ginstate, attnum, key, + NULL, nitem + GinGetNPosting(old)); if (res) { /* good, small enough */ - MergeItemPointers(GinGetPosting(res), - GinGetPosting(old), GinGetNPosting(old), - items, nitem - ); + uint32 newnitem; - GinSetNPosting(res, nitem + GinGetNPosting(old)); + newnitem = MergeItemPointers(GinGetPosting(res), + GinGetPosting(old), GinGetNPosting(old), + items, nitem); + /* merge might have eliminated some duplicate items */ + GinShortenTuple(res, newnitem); } else { diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 44510a36a5..f3ac36e859 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -435,6 +435,7 @@ extern void findParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBl /* ginentrypage.c */ extern IndexTuple GinFormTuple(GinState *ginstate, OffsetNumber attnum, Datum key, ItemPointerData *ipd, uint32 nipd); +extern void GinShortenTuple(IndexTuple itup, uint32 nipd); extern void prepareEntryScan(GinBtree btree, Relation index, OffsetNumber attnum, Datum value, GinState *ginstate); extern void entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf); @@ -442,7 +443,7 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf); /* gindatapage.c */ extern int compareItemPointers(ItemPointer a, ItemPointer b); -extern void MergeItemPointers(ItemPointerData *dst, +extern uint32 MergeItemPointers(ItemPointerData *dst, ItemPointerData *a, uint32 na, ItemPointerData *b, uint32 nb); -- 2.11.4.GIT