Guard against zero vardata.rel->tuples in estimate_hash_bucketsize().
[pgsql.git] / contrib / pg_trgm / trgm_gist.c
blob2181c2623fad4159981743dd5dd3164d35066506
1 /*
2 * contrib/pg_trgm/trgm_gist.c
3 */
4 #include "postgres.h"
6 #include "trgm.h"
8 #include "access/stratnum.h"
9 #include "fmgr.h"
12 typedef struct
14 /* most recent inputs to gtrgm_consistent */
15 StrategyNumber strategy;
16 text *query;
17 /* extracted trigrams for query */
18 TRGM *trigrams;
19 /* if a regex operator, the extracted graph */
20 TrgmPackedGraph *graph;
23 * The "query" and "trigrams" are stored in the same palloc block as this
24 * cache struct, at MAXALIGN'ed offsets. The graph however isn't.
26 } gtrgm_consistent_cache;
28 #define GETENTRY(vec,pos) ((TRGM *) DatumGetPointer((vec)->vector[(pos)].key))
31 PG_FUNCTION_INFO_V1(gtrgm_in);
32 PG_FUNCTION_INFO_V1(gtrgm_out);
33 PG_FUNCTION_INFO_V1(gtrgm_compress);
34 PG_FUNCTION_INFO_V1(gtrgm_decompress);
35 PG_FUNCTION_INFO_V1(gtrgm_consistent);
36 PG_FUNCTION_INFO_V1(gtrgm_distance);
37 PG_FUNCTION_INFO_V1(gtrgm_union);
38 PG_FUNCTION_INFO_V1(gtrgm_same);
39 PG_FUNCTION_INFO_V1(gtrgm_penalty);
40 PG_FUNCTION_INFO_V1(gtrgm_picksplit);
42 /* Number of one-bits in an unsigned byte */
43 static const uint8 number_of_ones[256] = {
44 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
45 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
46 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
47 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
48 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
49 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
50 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
51 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
52 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
53 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
54 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
55 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
56 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
58 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
59 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
63 Datum
64 gtrgm_in(PG_FUNCTION_ARGS)
66 elog(ERROR, "not implemented");
67 PG_RETURN_DATUM(0);
70 Datum
71 gtrgm_out(PG_FUNCTION_ARGS)
73 elog(ERROR, "not implemented");
74 PG_RETURN_DATUM(0);
77 static void
78 makesign(BITVECP sign, TRGM *a)
80 int32 k,
81 len = ARRNELEM(a);
82 trgm *ptr = GETARR(a);
83 int32 tmp = 0;
85 MemSet((void *) sign, 0, sizeof(BITVEC));
86 SETBIT(sign, SIGLENBIT); /* set last unused bit */
87 for (k = 0; k < len; k++)
89 CPTRGM(((char *) &tmp), ptr + k);
90 HASH(sign, tmp);
94 Datum
95 gtrgm_compress(PG_FUNCTION_ARGS)
97 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
98 GISTENTRY *retval = entry;
100 if (entry->leafkey)
101 { /* trgm */
102 TRGM *res;
103 text *val = DatumGetTextP(entry->key);
105 res = generate_trgm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
106 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
107 gistentryinit(*retval, PointerGetDatum(res),
108 entry->rel, entry->page,
109 entry->offset, FALSE);
111 else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
112 !ISALLTRUE(DatumGetPointer(entry->key)))
114 int32 i,
115 len;
116 TRGM *res;
117 BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
119 LOOPBYTE
121 if ((sign[i] & 0xff) != 0xff)
122 PG_RETURN_POINTER(retval);
125 len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
126 res = (TRGM *) palloc(len);
127 SET_VARSIZE(res, len);
128 res->flag = SIGNKEY | ALLISTRUE;
130 retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
131 gistentryinit(*retval, PointerGetDatum(res),
132 entry->rel, entry->page,
133 entry->offset, FALSE);
135 PG_RETURN_POINTER(retval);
138 Datum
139 gtrgm_decompress(PG_FUNCTION_ARGS)
141 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
142 GISTENTRY *retval;
143 text *key;
145 key = DatumGetTextP(entry->key);
147 if (key != (text *) DatumGetPointer(entry->key))
149 /* need to pass back the decompressed item */
150 retval = palloc(sizeof(GISTENTRY));
151 gistentryinit(*retval, PointerGetDatum(key),
152 entry->rel, entry->page, entry->offset, entry->leafkey);
153 PG_RETURN_POINTER(retval);
155 else
157 /* we can return the entry as-is */
158 PG_RETURN_POINTER(entry);
162 static int32
163 cnt_sml_sign_common(TRGM *qtrg, BITVECP sign)
165 int32 count = 0;
166 int32 k,
167 len = ARRNELEM(qtrg);
168 trgm *ptr = GETARR(qtrg);
169 int32 tmp = 0;
171 for (k = 0; k < len; k++)
173 CPTRGM(((char *) &tmp), ptr + k);
174 count += GETBIT(sign, HASHVAL(tmp));
177 return count;
180 Datum
181 gtrgm_consistent(PG_FUNCTION_ARGS)
183 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
184 text *query = PG_GETARG_TEXT_P(1);
185 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
187 /* Oid subtype = PG_GETARG_OID(3); */
188 bool *recheck = (bool *) PG_GETARG_POINTER(4);
189 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
190 TRGM *qtrg;
191 bool res;
192 Size querysize = VARSIZE(query);
193 gtrgm_consistent_cache *cache;
194 double nlimit;
197 * We keep the extracted trigrams in cache, because trigram extraction is
198 * relatively CPU-expensive. When trying to reuse a cached value, check
199 * strategy number not just query itself, because trigram extraction
200 * depends on strategy.
202 * The cached structure is a single palloc chunk containing the
203 * gtrgm_consistent_cache header, then the input query (starting at a
204 * MAXALIGN boundary), then the TRGM value (also starting at a MAXALIGN
205 * boundary). However we don't try to include the regex graph (if any) in
206 * that struct. (XXX currently, this approach can leak regex graphs
207 * across index rescans. Not clear if that's worth fixing.)
209 cache = (gtrgm_consistent_cache *) fcinfo->flinfo->fn_extra;
210 if (cache == NULL ||
211 cache->strategy != strategy ||
212 VARSIZE(cache->query) != querysize ||
213 memcmp((char *) cache->query, (char *) query, querysize) != 0)
215 gtrgm_consistent_cache *newcache;
216 TrgmPackedGraph *graph = NULL;
217 Size qtrgsize;
219 switch (strategy)
221 case SimilarityStrategyNumber:
222 case WordSimilarityStrategyNumber:
223 qtrg = generate_trgm(VARDATA(query),
224 querysize - VARHDRSZ);
225 break;
226 case ILikeStrategyNumber:
227 #ifndef IGNORECASE
228 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
229 #endif
230 /* FALL THRU */
231 case LikeStrategyNumber:
232 qtrg = generate_wildcard_trgm(VARDATA(query),
233 querysize - VARHDRSZ);
234 break;
235 case RegExpICaseStrategyNumber:
236 #ifndef IGNORECASE
237 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
238 #endif
239 /* FALL THRU */
240 case RegExpStrategyNumber:
241 qtrg = createTrgmNFA(query, PG_GET_COLLATION(),
242 &graph, fcinfo->flinfo->fn_mcxt);
243 /* just in case an empty array is returned ... */
244 if (qtrg && ARRNELEM(qtrg) <= 0)
246 pfree(qtrg);
247 qtrg = NULL;
249 break;
250 default:
251 elog(ERROR, "unrecognized strategy number: %d", strategy);
252 qtrg = NULL; /* keep compiler quiet */
253 break;
256 qtrgsize = qtrg ? VARSIZE(qtrg) : 0;
258 newcache = (gtrgm_consistent_cache *)
259 MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
260 MAXALIGN(sizeof(gtrgm_consistent_cache)) +
261 MAXALIGN(querysize) +
262 qtrgsize);
264 newcache->strategy = strategy;
265 newcache->query = (text *)
266 ((char *) newcache + MAXALIGN(sizeof(gtrgm_consistent_cache)));
267 memcpy((char *) newcache->query, (char *) query, querysize);
268 if (qtrg)
270 newcache->trigrams = (TRGM *)
271 ((char *) newcache->query + MAXALIGN(querysize));
272 memcpy((char *) newcache->trigrams, (char *) qtrg, qtrgsize);
273 /* release qtrg in case it was made in fn_mcxt */
274 pfree(qtrg);
276 else
277 newcache->trigrams = NULL;
278 newcache->graph = graph;
280 if (cache)
281 pfree(cache);
282 fcinfo->flinfo->fn_extra = (void *) newcache;
283 cache = newcache;
286 qtrg = cache->trigrams;
288 switch (strategy)
290 case SimilarityStrategyNumber:
291 case WordSimilarityStrategyNumber:
292 /* Similarity search is exact. Word similarity search is inexact */
293 *recheck = (strategy == WordSimilarityStrategyNumber);
294 nlimit = (strategy == SimilarityStrategyNumber) ?
295 similarity_threshold : word_similarity_threshold;
297 if (GIST_LEAF(entry))
298 { /* all leafs contains orig trgm */
300 * Prevent gcc optimizing the tmpsml variable using volatile
301 * keyword. Otherwise comparison of nlimit and tmpsml may give
302 * wrong results.
304 float4 volatile tmpsml = cnt_sml(qtrg, key, *recheck);
306 /* strange bug at freebsd 5.2.1 and gcc 3.3.3 */
307 res = (*(int *) &tmpsml == *(int *) &nlimit || tmpsml > nlimit);
309 else if (ISALLTRUE(key))
310 { /* non-leaf contains signature */
311 res = true;
313 else
314 { /* non-leaf contains signature */
315 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
316 int32 len = ARRNELEM(qtrg);
318 if (len == 0)
319 res = false;
320 else
321 res = (((((float8) count) / ((float8) len))) >= nlimit);
323 break;
324 case ILikeStrategyNumber:
325 #ifndef IGNORECASE
326 elog(ERROR, "cannot handle ~~* with case-sensitive trigrams");
327 #endif
328 /* FALL THRU */
329 case LikeStrategyNumber:
330 /* Wildcard search is inexact */
331 *recheck = true;
334 * Check if all the extracted trigrams can be present in child
335 * nodes.
337 if (GIST_LEAF(entry))
338 { /* all leafs contains orig trgm */
339 res = trgm_contained_by(qtrg, key);
341 else if (ISALLTRUE(key))
342 { /* non-leaf contains signature */
343 res = true;
345 else
346 { /* non-leaf contains signature */
347 int32 k,
348 tmp = 0,
349 len = ARRNELEM(qtrg);
350 trgm *ptr = GETARR(qtrg);
351 BITVECP sign = GETSIGN(key);
353 res = true;
354 for (k = 0; k < len; k++)
356 CPTRGM(((char *) &tmp), ptr + k);
357 if (!GETBIT(sign, HASHVAL(tmp)))
359 res = false;
360 break;
364 break;
365 case RegExpICaseStrategyNumber:
366 #ifndef IGNORECASE
367 elog(ERROR, "cannot handle ~* with case-sensitive trigrams");
368 #endif
369 /* FALL THRU */
370 case RegExpStrategyNumber:
371 /* Regexp search is inexact */
372 *recheck = true;
374 /* Check regex match as much as we can with available info */
375 if (qtrg)
377 if (GIST_LEAF(entry))
378 { /* all leafs contains orig trgm */
379 bool *check;
381 check = trgm_presence_map(qtrg, key);
382 res = trigramsMatchGraph(cache->graph, check);
383 pfree(check);
385 else if (ISALLTRUE(key))
386 { /* non-leaf contains signature */
387 res = true;
389 else
390 { /* non-leaf contains signature */
391 int32 k,
392 tmp = 0,
393 len = ARRNELEM(qtrg);
394 trgm *ptr = GETARR(qtrg);
395 BITVECP sign = GETSIGN(key);
396 bool *check;
399 * GETBIT() tests may give false positives, due to limited
400 * size of the sign array. But since trigramsMatchGraph()
401 * implements a monotone boolean function, false positives
402 * in the check array can't lead to false negative answer.
403 * So we can apply trigramsMatchGraph despite uncertainty,
404 * and that usefully improves the quality of the search.
406 check = (bool *) palloc(len * sizeof(bool));
407 for (k = 0; k < len; k++)
409 CPTRGM(((char *) &tmp), ptr + k);
410 check[k] = GETBIT(sign, HASHVAL(tmp));
412 res = trigramsMatchGraph(cache->graph, check);
413 pfree(check);
416 else
418 /* trigram-free query must be rechecked everywhere */
419 res = true;
421 break;
422 default:
423 elog(ERROR, "unrecognized strategy number: %d", strategy);
424 res = false; /* keep compiler quiet */
425 break;
428 PG_RETURN_BOOL(res);
431 Datum
432 gtrgm_distance(PG_FUNCTION_ARGS)
434 GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
435 text *query = PG_GETARG_TEXT_P(1);
436 StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
438 /* Oid subtype = PG_GETARG_OID(3); */
439 bool *recheck = (bool *) PG_GETARG_POINTER(4);
440 TRGM *key = (TRGM *) DatumGetPointer(entry->key);
441 TRGM *qtrg;
442 float8 res;
443 Size querysize = VARSIZE(query);
444 char *cache = (char *) fcinfo->flinfo->fn_extra;
447 * Cache the generated trigrams across multiple calls with the same query.
449 if (cache == NULL ||
450 VARSIZE(cache) != querysize ||
451 memcmp(cache, query, querysize) != 0)
453 char *newcache;
455 qtrg = generate_trgm(VARDATA(query), querysize - VARHDRSZ);
457 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
458 MAXALIGN(querysize) +
459 VARSIZE(qtrg));
461 memcpy(newcache, query, querysize);
462 memcpy(newcache + MAXALIGN(querysize), qtrg, VARSIZE(qtrg));
464 if (cache)
465 pfree(cache);
466 fcinfo->flinfo->fn_extra = newcache;
467 cache = newcache;
470 qtrg = (TRGM *) (cache + MAXALIGN(querysize));
472 switch (strategy)
474 case DistanceStrategyNumber:
475 case WordDistanceStrategyNumber:
476 *recheck = strategy == WordDistanceStrategyNumber;
477 if (GIST_LEAF(entry))
478 { /* all leafs contains orig trgm */
480 * Prevent gcc optimizing the sml variable using volatile
481 * keyword. Otherwise res can differ from the
482 * word_similarity_dist_op() function.
484 float4 volatile sml = cnt_sml(qtrg, key, *recheck);
485 res = 1.0 - sml;
487 else if (ISALLTRUE(key))
488 { /* all leafs contains orig trgm */
489 res = 0.0;
491 else
492 { /* non-leaf contains signature */
493 int32 count = cnt_sml_sign_common(qtrg, GETSIGN(key));
494 int32 len = ARRNELEM(qtrg);
496 res = (len == 0) ? -1.0 : 1.0 - ((float8) count) / ((float8) len);
498 break;
499 default:
500 elog(ERROR, "unrecognized strategy number: %d", strategy);
501 res = 0; /* keep compiler quiet */
502 break;
505 PG_RETURN_FLOAT8(res);
508 static int32
509 unionkey(BITVECP sbase, TRGM *add)
511 int32 i;
513 if (ISSIGNKEY(add))
515 BITVECP sadd = GETSIGN(add);
517 if (ISALLTRUE(add))
518 return 1;
520 LOOPBYTE
521 sbase[i] |= sadd[i];
523 else
525 trgm *ptr = GETARR(add);
526 int32 tmp = 0;
528 for (i = 0; i < ARRNELEM(add); i++)
530 CPTRGM(((char *) &tmp), ptr + i);
531 HASH(sbase, tmp);
534 return 0;
538 Datum
539 gtrgm_union(PG_FUNCTION_ARGS)
541 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
542 int32 len = entryvec->n;
543 int *size = (int *) PG_GETARG_POINTER(1);
544 BITVEC base;
545 int32 i;
546 int32 flag = 0;
547 TRGM *result;
549 MemSet((void *) base, 0, sizeof(BITVEC));
550 for (i = 0; i < len; i++)
552 if (unionkey(base, GETENTRY(entryvec, i)))
554 flag = ALLISTRUE;
555 break;
559 flag |= SIGNKEY;
560 len = CALCGTSIZE(flag, 0);
561 result = (TRGM *) palloc(len);
562 SET_VARSIZE(result, len);
563 result->flag = flag;
564 if (!ISALLTRUE(result))
565 memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
566 *size = len;
568 PG_RETURN_POINTER(result);
571 Datum
572 gtrgm_same(PG_FUNCTION_ARGS)
574 TRGM *a = (TRGM *) PG_GETARG_POINTER(0);
575 TRGM *b = (TRGM *) PG_GETARG_POINTER(1);
576 bool *result = (bool *) PG_GETARG_POINTER(2);
578 if (ISSIGNKEY(a))
579 { /* then b also ISSIGNKEY */
580 if (ISALLTRUE(a) && ISALLTRUE(b))
581 *result = true;
582 else if (ISALLTRUE(a))
583 *result = false;
584 else if (ISALLTRUE(b))
585 *result = false;
586 else
588 int32 i;
589 BITVECP sa = GETSIGN(a),
590 sb = GETSIGN(b);
592 *result = true;
593 LOOPBYTE
595 if (sa[i] != sb[i])
597 *result = false;
598 break;
603 else
604 { /* a and b ISARRKEY */
605 int32 lena = ARRNELEM(a),
606 lenb = ARRNELEM(b);
608 if (lena != lenb)
609 *result = false;
610 else
612 trgm *ptra = GETARR(a),
613 *ptrb = GETARR(b);
614 int32 i;
616 *result = true;
617 for (i = 0; i < lena; i++)
618 if (CMPTRGM(ptra + i, ptrb + i))
620 *result = false;
621 break;
626 PG_RETURN_POINTER(result);
629 static int32
630 sizebitvec(BITVECP sign)
632 int32 size = 0,
635 LOOPBYTE
636 size += number_of_ones[(unsigned char) sign[i]];
637 return size;
640 static int
641 hemdistsign(BITVECP a, BITVECP b)
643 int i,
644 diff,
645 dist = 0;
647 LOOPBYTE
649 diff = (unsigned char) (a[i] ^ b[i]);
650 dist += number_of_ones[diff];
652 return dist;
655 static int
656 hemdist(TRGM *a, TRGM *b)
658 if (ISALLTRUE(a))
660 if (ISALLTRUE(b))
661 return 0;
662 else
663 return SIGLENBIT - sizebitvec(GETSIGN(b));
665 else if (ISALLTRUE(b))
666 return SIGLENBIT - sizebitvec(GETSIGN(a));
668 return hemdistsign(GETSIGN(a), GETSIGN(b));
671 Datum
672 gtrgm_penalty(PG_FUNCTION_ARGS)
674 GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
675 GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
676 float *penalty = (float *) PG_GETARG_POINTER(2);
677 TRGM *origval = (TRGM *) DatumGetPointer(origentry->key);
678 TRGM *newval = (TRGM *) DatumGetPointer(newentry->key);
679 BITVECP orig = GETSIGN(origval);
681 *penalty = 0.0;
683 if (ISARRKEY(newval))
685 char *cache = (char *) fcinfo->flinfo->fn_extra;
686 TRGM *cachedVal = (TRGM *) (cache + MAXALIGN(sizeof(BITVEC)));
687 Size newvalsize = VARSIZE(newval);
688 BITVECP sign;
691 * Cache the sign data across multiple calls with the same newval.
693 if (cache == NULL ||
694 VARSIZE(cachedVal) != newvalsize ||
695 memcmp(cachedVal, newval, newvalsize) != 0)
697 char *newcache;
699 newcache = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
700 MAXALIGN(sizeof(BITVEC)) +
701 newvalsize);
703 makesign((BITVECP) newcache, newval);
705 cachedVal = (TRGM *) (newcache + MAXALIGN(sizeof(BITVEC)));
706 memcpy(cachedVal, newval, newvalsize);
708 if (cache)
709 pfree(cache);
710 fcinfo->flinfo->fn_extra = newcache;
711 cache = newcache;
714 sign = (BITVECP) cache;
716 if (ISALLTRUE(origval))
717 *penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
718 else
719 *penalty = hemdistsign(sign, orig);
721 else
722 *penalty = hemdist(origval, newval);
723 PG_RETURN_POINTER(penalty);
726 typedef struct
728 bool allistrue;
729 BITVEC sign;
730 } CACHESIGN;
732 static void
733 fillcache(CACHESIGN *item, TRGM *key)
735 item->allistrue = false;
736 if (ISARRKEY(key))
737 makesign(item->sign, key);
738 else if (ISALLTRUE(key))
739 item->allistrue = true;
740 else
741 memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
744 #define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
745 typedef struct
747 OffsetNumber pos;
748 int32 cost;
749 } SPLITCOST;
751 static int
752 comparecost(const void *a, const void *b)
754 if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
755 return 0;
756 else
757 return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
761 static int
762 hemdistcache(CACHESIGN *a, CACHESIGN *b)
764 if (a->allistrue)
766 if (b->allistrue)
767 return 0;
768 else
769 return SIGLENBIT - sizebitvec(b->sign);
771 else if (b->allistrue)
772 return SIGLENBIT - sizebitvec(a->sign);
774 return hemdistsign(a->sign, b->sign);
777 Datum
778 gtrgm_picksplit(PG_FUNCTION_ARGS)
780 GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
781 OffsetNumber maxoff = entryvec->n - 2;
782 GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
783 OffsetNumber k,
785 TRGM *datum_l,
786 *datum_r;
787 BITVECP union_l,
788 union_r;
789 int32 size_alpha,
790 size_beta;
791 int32 size_waste,
792 waste = -1;
793 int32 nbytes;
794 OffsetNumber seed_1 = 0,
795 seed_2 = 0;
796 OffsetNumber *left,
797 *right;
798 BITVECP ptr;
799 int i;
800 CACHESIGN *cache;
801 SPLITCOST *costvector;
803 /* cache the sign data for each existing item */
804 cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
805 for (k = FirstOffsetNumber; k <= maxoff; k = OffsetNumberNext(k))
806 fillcache(&cache[k], GETENTRY(entryvec, k));
808 /* now find the two furthest-apart items */
809 for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
811 for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
813 size_waste = hemdistcache(&(cache[j]), &(cache[k]));
814 if (size_waste > waste)
816 waste = size_waste;
817 seed_1 = k;
818 seed_2 = j;
823 /* just in case we didn't make a selection ... */
824 if (seed_1 == 0 || seed_2 == 0)
826 seed_1 = 1;
827 seed_2 = 2;
830 /* initialize the result vectors */
831 nbytes = (maxoff + 2) * sizeof(OffsetNumber);
832 v->spl_left = left = (OffsetNumber *) palloc(nbytes);
833 v->spl_right = right = (OffsetNumber *) palloc(nbytes);
834 v->spl_nleft = 0;
835 v->spl_nright = 0;
837 /* form initial .. */
838 if (cache[seed_1].allistrue)
840 datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
841 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
842 datum_l->flag = SIGNKEY | ALLISTRUE;
844 else
846 datum_l = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
847 SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
848 datum_l->flag = SIGNKEY;
849 memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
851 if (cache[seed_2].allistrue)
853 datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
854 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
855 datum_r->flag = SIGNKEY | ALLISTRUE;
857 else
859 datum_r = (TRGM *) palloc(CALCGTSIZE(SIGNKEY, 0));
860 SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
861 datum_r->flag = SIGNKEY;
862 memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
865 union_l = GETSIGN(datum_l);
866 union_r = GETSIGN(datum_r);
867 maxoff = OffsetNumberNext(maxoff);
868 fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
869 /* sort before ... */
870 costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
871 for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
873 costvector[j - 1].pos = j;
874 size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
875 size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
876 costvector[j - 1].cost = abs(size_alpha - size_beta);
878 qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
880 for (k = 0; k < maxoff; k++)
882 j = costvector[k].pos;
883 if (j == seed_1)
885 *left++ = j;
886 v->spl_nleft++;
887 continue;
889 else if (j == seed_2)
891 *right++ = j;
892 v->spl_nright++;
893 continue;
896 if (ISALLTRUE(datum_l) || cache[j].allistrue)
898 if (ISALLTRUE(datum_l) && cache[j].allistrue)
899 size_alpha = 0;
900 else
901 size_alpha = SIGLENBIT - sizebitvec(
902 (cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
905 else
906 size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
908 if (ISALLTRUE(datum_r) || cache[j].allistrue)
910 if (ISALLTRUE(datum_r) && cache[j].allistrue)
911 size_beta = 0;
912 else
913 size_beta = SIGLENBIT - sizebitvec(
914 (cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
917 else
918 size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
920 if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
922 if (ISALLTRUE(datum_l) || cache[j].allistrue)
924 if (!ISALLTRUE(datum_l))
925 MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
927 else
929 ptr = cache[j].sign;
930 LOOPBYTE
931 union_l[i] |= ptr[i];
933 *left++ = j;
934 v->spl_nleft++;
936 else
938 if (ISALLTRUE(datum_r) || cache[j].allistrue)
940 if (!ISALLTRUE(datum_r))
941 MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
943 else
945 ptr = cache[j].sign;
946 LOOPBYTE
947 union_r[i] |= ptr[i];
949 *right++ = j;
950 v->spl_nright++;
954 *right = *left = FirstOffsetNumber;
955 v->spl_ldatum = PointerGetDatum(datum_l);
956 v->spl_rdatum = PointerGetDatum(datum_r);
958 PG_RETURN_POINTER(v);