mySQL 5.0.11 sources for tomato
[tomato.git] / release / src / router / mysql / sql / uniques.cc
blobb2a2ef0eeca2fbfeb0faaa045081b06f8a21c15e
1 /*
2 Copyright (c) 2001-2003, 2005-2007 MySQL AB, 2009 Sun Microsystems, Inc.
3 Use is subject to license terms.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 Function to handle quick removal of duplicates
21 This code is used when doing multi-table deletes to find the rows in
22 reference tables that needs to be deleted.
24 The basic idea is as follows:
26 Store first all strings in a binary tree, ignoring duplicates.
27 When the tree uses more memory than 'max_heap_table_size',
28 write the tree (in sorted order) out to disk and start with a new tree.
29 When all data has been generated, merge the trees (removing any found
30 duplicates).
32 The unique entries will be returned in sort order, to ensure that we do the
33 deletes in disk order.
36 #include "mysql_priv.h"
37 #include "sql_sort.h"
40 int unique_write_to_file(uchar* key, element_count count, Unique *unique)
43 Use unique->size (size of element stored in the tree) and not
44 unique->tree.size_of_element. The latter is different from unique->size
45 when tree implementation chooses to store pointer to key in TREE_ELEMENT
46 (instead of storing the element itself there)
48 return my_b_write(&unique->file, key, unique->size) ? 1 : 0;
51 int unique_write_to_ptrs(uchar* key, element_count count, Unique *unique)
53 memcpy(unique->record_pointers, key, unique->size);
54 unique->record_pointers+=unique->size;
55 return 0;
58 Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
59 uint size_arg, ulonglong max_in_memory_size_arg)
60 :max_in_memory_size(max_in_memory_size_arg), size(size_arg), elements(0)
62 my_b_clear(&file);
63 init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, 0,
64 NULL, comp_func_fixed_arg);
65 /* If the following fail's the next add will also fail */
66 my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
68 If you change the following, change it in get_max_elements function, too.
70 max_elements= (ulong) (max_in_memory_size /
71 ALIGN_SIZE(sizeof(TREE_ELEMENT)+size));
72 VOID(open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
73 MYF(MY_WME)));
78 Calculate log2(n!)
80 NOTES
81 Stirling's approximate formula is used:
83 n! ~= sqrt(2*M_PI*n) * (n/M_E)^n
85 Derivation of formula used for calculations is as follows:
87 log2(n!) = log(n!)/log(2) = log(sqrt(2*M_PI*n)*(n/M_E)^n) / log(2) =
89 = (log(2*M_PI*n)/2 + n*log(n/M_E)) / log(2).
92 inline double log2_n_fact(double x)
94 return (log(2*M_PI*x)/2 + x*log(x/M_E)) / M_LN2;
99 Calculate cost of merge_buffers function call for given sequence of
100 input stream lengths and store the number of rows in result stream in *last.
102 SYNOPSIS
103 get_merge_buffers_cost()
104 buff_elems Array of #s of elements in buffers
105 elem_size Size of element stored in buffer
106 first Pointer to first merged element size
107 last Pointer to last merged element size
109 RETURN
110 Cost of merge_buffers operation in disk seeks.
112 NOTES
113 It is assumed that no rows are eliminated during merge.
114 The cost is calculated as
116 cost(read_and_write) + cost(merge_comparisons).
118 All bytes in the sequences is read and written back during merge so cost
119 of disk io is 2*elem_size*total_buf_elems/IO_SIZE (2 is for read + write)
121 For comparisons cost calculations we assume that all merged sequences have
122 the same length, so each of total_buf_size elements will be added to a sort
123 heap with (n_buffers-1) elements. This gives the comparison cost:
125 total_buf_elems* log2(n_buffers) / TIME_FOR_COMPARE_ROWID;
128 static double get_merge_buffers_cost(uint *buff_elems, uint elem_size,
129 uint *first, uint *last)
131 uint total_buf_elems= 0;
132 for (uint *pbuf= first; pbuf <= last; pbuf++)
133 total_buf_elems+= *pbuf;
134 *last= total_buf_elems;
136 size_t n_buffers= last - first + 1;
138 /* Using log2(n)=log(n)/log(2) formula */
139 return 2*((double)total_buf_elems*elem_size) / IO_SIZE +
140 total_buf_elems*log((double) n_buffers) / (TIME_FOR_COMPARE_ROWID * M_LN2);
145 Calculate cost of merging buffers into one in Unique::get, i.e. calculate
146 how long (in terms of disk seeks) the two calls
147 merge_many_buffs(...);
148 merge_buffers(...);
149 will take.
151 SYNOPSIS
152 get_merge_many_buffs_cost()
153 buffer buffer space for temporary data, at least
154 Unique::get_cost_calc_buff_size bytes
155 maxbuffer # of full buffers
156 max_n_elems # of elements in first maxbuffer buffers
157 last_n_elems # of elements in last buffer
158 elem_size size of buffer element
160 NOTES
161 maxbuffer+1 buffers are merged, where first maxbuffer buffers contain
162 max_n_elems elements each and last buffer contains last_n_elems elements.
164 The current implementation does a dumb simulation of merge_many_buffs
165 function actions.
167 RETURN
168 Cost of merge in disk seeks.
171 static double get_merge_many_buffs_cost(uint *buffer,
172 uint maxbuffer, uint max_n_elems,
173 uint last_n_elems, int elem_size)
175 register int i;
176 double total_cost= 0.0;
177 uint *buff_elems= buffer; /* #s of elements in each of merged sequences */
180 Set initial state: first maxbuffer sequences contain max_n_elems elements
181 each, last sequence contains last_n_elems elements.
183 for (i = 0; i < (int)maxbuffer; i++)
184 buff_elems[i]= max_n_elems;
185 buff_elems[maxbuffer]= last_n_elems;
188 Do it exactly as merge_many_buff function does, calling
189 get_merge_buffers_cost to get cost of merge_buffers.
191 if (maxbuffer >= MERGEBUFF2)
193 while (maxbuffer >= MERGEBUFF2)
195 uint lastbuff= 0;
196 for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
198 total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
199 buff_elems + i,
200 buff_elems + i + MERGEBUFF-1);
201 lastbuff++;
203 total_cost+=get_merge_buffers_cost(buff_elems, elem_size,
204 buff_elems + i,
205 buff_elems + maxbuffer);
206 maxbuffer= lastbuff;
210 /* Simulate final merge_buff call. */
211 total_cost += get_merge_buffers_cost(buff_elems, elem_size,
212 buff_elems, buff_elems + maxbuffer);
213 return total_cost;
218 Calculate cost of using Unique for processing nkeys elements of size
219 key_size using max_in_memory_size memory.
221 SYNOPSIS
222 Unique::get_use_cost()
223 buffer space for temporary data, use Unique::get_cost_calc_buff_size
224 to get # bytes needed.
225 nkeys #of elements in Unique
226 key_size size of each elements in bytes
227 max_in_memory_size amount of memory Unique will be allowed to use
229 RETURN
230 Cost in disk seeks.
232 NOTES
233 cost(using_unqiue) =
234 cost(create_trees) + (see #1)
235 cost(merge) + (see #2)
236 cost(read_result) (see #3)
238 1. Cost of trees creation
239 For each Unique::put operation there will be 2*log2(n+1) elements
240 comparisons, where n runs from 1 tree_size (we assume that all added
241 elements are different). Together this gives:
243 n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!)
245 then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
247 Total cost of creating trees:
248 (n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
250 Approximate value of log2(N!) is calculated by log2_n_fact function.
252 2. Cost of merging.
253 If only one tree is created by Unique no merging will be necessary.
254 Otherwise, we model execution of merge_many_buff function and count
255 #of merges. (The reason behind this is that number of buffers is small,
256 while size of buffers is big and we don't want to loose precision with
257 O(x)-style formula)
259 3. If only one tree is created by Unique no disk io will happen.
260 Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
261 these will be random seeks.
264 double Unique::get_use_cost(uint *buffer, uint nkeys, uint key_size,
265 ulonglong max_in_memory_size)
267 ulong max_elements_in_tree;
268 ulong last_tree_elems;
269 int n_full_trees; /* number of trees in unique - 1 */
270 double result;
272 max_elements_in_tree= ((ulong) max_in_memory_size /
273 ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size));
275 n_full_trees= nkeys / max_elements_in_tree;
276 last_tree_elems= nkeys % max_elements_in_tree;
278 /* Calculate cost of creating trees */
279 result= 2*log2_n_fact(last_tree_elems + 1.0);
280 if (n_full_trees)
281 result+= n_full_trees * log2_n_fact(max_elements_in_tree + 1.0);
282 result /= TIME_FOR_COMPARE_ROWID;
284 DBUG_PRINT("info",("unique trees sizes: %u=%u*%lu + %lu", nkeys,
285 n_full_trees, n_full_trees?max_elements_in_tree:0,
286 last_tree_elems));
288 if (!n_full_trees)
289 return result;
292 There is more then one tree and merging is necessary.
293 First, add cost of writing all trees to disk, assuming that all disk
294 writes are sequential.
296 result += DISK_SEEK_BASE_COST * n_full_trees *
297 ceil(((double) key_size)*max_elements_in_tree / IO_SIZE);
298 result += DISK_SEEK_BASE_COST * ceil(((double) key_size)*last_tree_elems / IO_SIZE);
300 /* Cost of merge */
301 double merge_cost= get_merge_many_buffs_cost(buffer, n_full_trees,
302 max_elements_in_tree,
303 last_tree_elems, key_size);
304 if (merge_cost < 0.0)
305 return merge_cost;
307 result += merge_cost;
309 Add cost of reading the resulting sequence, assuming there were no
310 duplicate elements.
312 result += ceil((double)key_size*nkeys/IO_SIZE);
314 return result;
317 Unique::~Unique()
319 close_cached_file(&file);
320 delete_tree(&tree);
321 delete_dynamic(&file_ptrs);
325 /* Write tree to disk; clear tree */
326 bool Unique::flush()
328 BUFFPEK file_ptr;
329 elements+= tree.elements_in_tree;
330 file_ptr.count=tree.elements_in_tree;
331 file_ptr.file_pos=my_b_tell(&file);
333 if (tree_walk(&tree, (tree_walk_action) unique_write_to_file,
334 (void*) this, left_root_right) ||
335 insert_dynamic(&file_ptrs, (uchar*) &file_ptr))
336 return 1;
337 delete_tree(&tree);
338 return 0;
343 Clear the tree and the file.
344 You must call reset() if you want to reuse Unique after walk().
347 void
348 Unique::reset()
350 reset_tree(&tree);
352 If elements != 0, some trees were stored in the file (see how
353 flush() works). Note, that we can not count on my_b_tell(&file) == 0
354 here, because it can return 0 right after walk(), and walk() does not
355 reset any Unique member.
357 if (elements)
359 reset_dynamic(&file_ptrs);
360 reinit_io_cache(&file, WRITE_CACHE, 0L, 0, 1);
362 elements= 0;
366 The comparison function, passed to queue_init() in merge_walk() and in
367 merge_buffers() when the latter is called from Uniques::get() must
368 use comparison function of Uniques::tree, but compare members of struct
369 BUFFPEK.
372 C_MODE_START
374 static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2)
376 BUFFPEK_COMPARE_CONTEXT *ctx= (BUFFPEK_COMPARE_CONTEXT *) arg;
377 return ctx->key_compare(ctx->key_compare_arg,
378 *((uchar **) key_ptr1), *((uchar **)key_ptr2));
381 C_MODE_END
385 DESCRIPTION
387 Function is very similar to merge_buffers, but instead of writing sorted
388 unique keys to the output file, it invokes walk_action for each key.
389 This saves I/O if you need to pass through all unique keys only once.
391 SYNOPSIS
392 merge_walk()
393 All params are 'IN' (but see comment for begin, end):
394 merge_buffer buffer to perform cached piece-by-piece loading
395 of trees; initially the buffer is empty
396 merge_buffer_size size of merge_buffer. Must be aligned with
397 key_length
398 key_length size of tree element; key_length * (end - begin)
399 must be less or equal than merge_buffer_size.
400 begin pointer to BUFFPEK struct for the first tree.
401 end pointer to BUFFPEK struct for the last tree;
402 end > begin and [begin, end) form a consecutive
403 range. BUFFPEKs structs in that range are used and
404 overwritten in merge_walk().
405 walk_action element visitor. Action is called for each unique
406 key.
407 walk_action_arg argument to walk action. Passed to it on each call.
408 compare elements comparison function
409 compare_arg comparison function argument
410 file file with all trees dumped. Trees in the file
411 must contain sorted unique values. Cache must be
412 initialized in read mode.
413 RETURN VALUE
414 0 ok
415 <> 0 error
418 static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
419 uint key_length, BUFFPEK *begin, BUFFPEK *end,
420 tree_walk_action walk_action, void *walk_action_arg,
421 qsort_cmp2 compare, void *compare_arg,
422 IO_CACHE *file)
424 BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
425 QUEUE queue;
426 if (end <= begin ||
427 merge_buffer_size < (ulong) (key_length * (end - begin + 1)) ||
428 init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
429 buffpek_compare, &compare_context))
430 return 1;
431 /* we need space for one key when a piece of merge buffer is re-read */
432 merge_buffer_size-= key_length;
433 uchar *save_key_buff= merge_buffer + merge_buffer_size;
434 uint max_key_count_per_piece= (uint) (merge_buffer_size/(end-begin) /
435 key_length);
436 /* if piece_size is aligned reuse_freed_buffer will always hit */
437 uint piece_size= max_key_count_per_piece * key_length;
438 uint bytes_read; /* to hold return value of read_to_buffer */
439 BUFFPEK *top;
440 int res= 1;
442 Invariant: queue must contain top element from each tree, until a tree
443 is not completely walked through.
444 Here we're forcing the invariant, inserting one element from each tree
445 to the queue.
447 for (top= begin; top != end; ++top)
449 top->base= merge_buffer + (top - begin) * piece_size;
450 top->max_keys= max_key_count_per_piece;
451 bytes_read= read_to_buffer(file, top, key_length);
452 if (bytes_read == (uint) (-1))
453 goto end;
454 DBUG_ASSERT(bytes_read);
455 queue_insert(&queue, (uchar *) top);
457 top= (BUFFPEK *) queue_top(&queue);
458 while (queue.elements > 1)
461 Every iteration one element is removed from the queue, and one is
462 inserted by the rules of the invariant. If two adjacent elements on
463 the top of the queue are not equal, biggest one is unique, because all
464 elements in each tree are unique. Action is applied only to unique
465 elements.
467 void *old_key= top->key;
469 read next key from the cache or from the file and push it to the
470 queue; this gives new top.
472 top->key+= key_length;
473 if (--top->mem_count)
474 queue_replaced(&queue);
475 else /* next piece should be read */
477 /* save old_key not to overwrite it in read_to_buffer */
478 memcpy(save_key_buff, old_key, key_length);
479 old_key= save_key_buff;
480 bytes_read= read_to_buffer(file, top, key_length);
481 if (bytes_read == (uint) (-1))
482 goto end;
483 else if (bytes_read > 0) /* top->key, top->mem_count are reset */
484 queue_replaced(&queue); /* in read_to_buffer */
485 else
488 Tree for old 'top' element is empty: remove it from the queue and
489 give all its memory to the nearest tree.
491 queue_remove(&queue, 0);
492 reuse_freed_buff(&queue, top, key_length);
495 top= (BUFFPEK *) queue_top(&queue);
496 /* new top has been obtained; if old top is unique, apply the action */
497 if (compare(compare_arg, old_key, top->key))
499 if (walk_action(old_key, 1, walk_action_arg))
500 goto end;
504 Applying walk_action to the tail of the last tree: this is safe because
505 either we had only one tree in the beginning, either we work with the
506 last tree in the queue.
512 if (walk_action(top->key, 1, walk_action_arg))
513 goto end;
514 top->key+= key_length;
516 while (--top->mem_count);
517 bytes_read= read_to_buffer(file, top, key_length);
518 if (bytes_read == (uint) (-1))
519 goto end;
521 while (bytes_read);
522 res= 0;
523 end:
524 delete_queue(&queue);
525 return res;
530 DESCRIPTION
531 Walks consecutively through all unique elements:
532 if all elements are in memory, then it simply invokes 'tree_walk', else
533 all flushed trees are loaded to memory piece-by-piece, pieces are
534 sorted, and action is called for each unique value.
535 Note: so as merging resets file_ptrs state, this method can change
536 internal Unique state to undefined: if you want to reuse Unique after
537 walk() you must call reset() first!
538 SYNOPSIS
539 Unique:walk()
540 All params are 'IN':
541 action function-visitor, typed in include/my_tree.h
542 function is called for each unique element
543 arg argument for visitor, which is passed to it on each call
544 RETURN VALUE
545 0 OK
546 <> 0 error
549 bool Unique::walk(tree_walk_action action, void *walk_action_arg)
551 int res;
552 uchar *merge_buffer;
554 if (elements == 0) /* the whole tree is in memory */
555 return tree_walk(&tree, action, walk_action_arg, left_root_right);
557 /* flush current tree to the file to have some memory for merge buffer */
558 if (flush())
559 return 1;
560 if (flush_io_cache(&file) || reinit_io_cache(&file, READ_CACHE, 0L, 0, 0))
561 return 1;
562 if (!(merge_buffer= (uchar *) my_malloc((ulong) max_in_memory_size, MYF(0))))
563 return 1;
564 res= merge_walk(merge_buffer, (ulong) max_in_memory_size, size,
565 (BUFFPEK *) file_ptrs.buffer,
566 (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
567 action, walk_action_arg,
568 tree.compare, tree.custom_arg, &file);
569 my_free((char*) merge_buffer, MYF(0));
570 return res;
574 Modify the TABLE element so that when one calls init_records()
575 the rows will be read in priority order.
578 bool Unique::get(TABLE *table)
580 SORTPARAM sort_param;
581 table->sort.found_records=elements+tree.elements_in_tree;
583 if (my_b_tell(&file) == 0)
585 /* Whole tree is in memory; Don't use disk if you don't need to */
586 if ((record_pointers=table->sort.record_pointers= (uchar*)
587 my_malloc(size * tree.elements_in_tree, MYF(0))))
589 (void) tree_walk(&tree, (tree_walk_action) unique_write_to_ptrs,
590 this, left_root_right);
591 return 0;
594 /* Not enough memory; Save the result to file && free memory used by tree */
595 if (flush())
596 return 1;
598 IO_CACHE *outfile=table->sort.io_cache;
599 BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
600 uint maxbuffer= file_ptrs.elements - 1;
601 uchar *sort_buffer;
602 my_off_t save_pos;
603 bool error=1;
605 /* Open cached file if it isn't open */
606 outfile=table->sort.io_cache=(IO_CACHE*) my_malloc(sizeof(IO_CACHE),
607 MYF(MY_ZEROFILL));
609 if (!outfile || (! my_b_inited(outfile) &&
610 open_cached_file(outfile,mysql_tmpdir,TEMP_PREFIX,READ_RECORD_BUFFER,
611 MYF(MY_WME))))
612 return 1;
613 reinit_io_cache(outfile,WRITE_CACHE,0L,0,0);
615 bzero((char*) &sort_param,sizeof(sort_param));
616 sort_param.max_rows= elements;
617 sort_param.sort_form=table;
618 sort_param.rec_length= sort_param.sort_length= sort_param.ref_length=
619 size;
620 sort_param.keys= (uint) (max_in_memory_size / sort_param.sort_length);
621 sort_param.not_killable=1;
623 if (!(sort_buffer=(uchar*) my_malloc((sort_param.keys+1) *
624 sort_param.sort_length,
625 MYF(0))))
626 return 1;
627 sort_param.unique_buff= sort_buffer+(sort_param.keys*
628 sort_param.sort_length);
630 sort_param.compare= (qsort2_cmp) buffpek_compare;
631 sort_param.cmp_context.key_compare= tree.compare;
632 sort_param.cmp_context.key_compare_arg= tree.custom_arg;
634 /* Merge the buffers to one file, removing duplicates */
635 if (merge_many_buff(&sort_param,sort_buffer,file_ptr,&maxbuffer,&file))
636 goto err;
637 if (flush_io_cache(&file) ||
638 reinit_io_cache(&file,READ_CACHE,0L,0,0))
639 goto err;
640 if (merge_buffers(&sort_param, &file, outfile, sort_buffer, file_ptr,
641 file_ptr, file_ptr+maxbuffer,0))
642 goto err;
643 error=0;
644 err:
645 x_free(sort_buffer);
646 if (flush_io_cache(outfile))
647 error=1;
649 /* Setup io_cache for reading */
650 save_pos=outfile->pos_in_file;
651 if (reinit_io_cache(outfile,READ_CACHE,0L,0,0))
652 error=1;
653 outfile->end_of_file=save_pos;
654 return error;