2 Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 /* Written by Sergei A. Golubchik, who has a shared copyright to this code
19 added support for long options (my_getopt) 22.5.2002 by Jani Tolonen */
22 #include <my_getopt.h>
25 static void complain(int val
);
26 static my_bool
get_one_option(int, const struct my_option
*, char *);
28 static int count
=0, stats
=0, dump
=0, lstats
=0;
29 static my_bool verbose
;
30 static char *query
=NULL
;
31 static uint lengths
[256];
33 #define MAX_LEN (HA_FT_MAXBYTELEN+10)
34 #define HOW_OFTEN_TO_WRITE 10000
36 static struct my_option my_long_options
[] =
38 {"help", 'h', "Display help and exit.",
39 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
40 {"help", '?', "Synonym for -h.",
41 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
42 {"count", 'c', "Calculate per-word stats (counts and global weights).",
43 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
44 {"dump", 'd', "Dump index (incl. data offsets and word weights).",
45 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
46 {"length", 'l', "Report length distribution.",
47 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
48 {"stats", 's', "Report global stats.",
49 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0},
50 {"verbose", 'v', "Be verbose.",
51 (uchar
**) &verbose
, (uchar
**) &verbose
, 0, GET_BOOL
, NO_ARG
, 0, 0, 0, 0, 0, 0},
52 { 0, 0, 0, 0, 0, 0, GET_NO_ARG
, NO_ARG
, 0, 0, 0, 0, 0, 0}
56 int main(int argc
,char *argv
[])
59 uint keylen
, keylen2
=0, inx
, doc_cnt
=0;
61 double gws
, min_gws
=0, avg_gws
=0;
63 char buf
[MAX_LEN
], buf2
[MAX_LEN
], buf_maxlen
[MAX_LEN
], buf_min_gws
[MAX_LEN
];
64 ulong total
=0, maxlen
=0, uniq
=0, max_doc_cnt
=0;
65 struct { MI_INFO
*info
; } aio0
, *aio
=&aio0
; /* for GWS_IN_USE */
68 if ((error
= handle_options(&argc
, &argv
, my_long_options
, get_one_option
)))
72 if (!count
&& !dump
&& !lstats
&& !query
)
83 inx
= (uint
) strtoll(argv
[1], &end
, 10);
88 init_key_cache(dflt_key_cache
,MI_KEY_BLOCK_LENGTH
,USE_BUFFER_INIT
, 0, 0);
90 if (!(info
=mi_open(argv
[0], O_RDONLY
,
91 HA_OPEN_ABORT_IF_LOCKED
|HA_OPEN_FROM_SQL_LAYER
)))
100 if ((inx
>= info
->s
->base
.keys
) ||
101 !(info
->s
->keyinfo
[inx
].flag
& HA_FULLTEXT
))
103 printf("Key %d in table %s is not a FULLTEXT key\n", inx
, info
->filename
);
107 mi_lock_database(info
, F_EXTRA_LCK
);
109 info
->lastpos
= HA_OFFSET_ERROR
;
110 info
->update
|= HA_STATE_PREV_FOUND
;
112 while (!(error
=mi_rnext(info
,NULL
,inx
)))
114 keylen
=*(info
->lastkey
);
116 subkeys
=ft_sintXkorr(info
->lastkey
+keylen
+1);
118 ft_floatXget(weight
, info
->lastkey
+keylen
+1);
121 snprintf(buf
,MAX_LEN
,"%.*s",(int) keylen
,info
->lastkey
+1);
123 sprintf(buf
,"%.*s",(int) keylen
,info
->lastkey
+1);
125 my_casedn_str(default_charset_info
,buf
);
131 if (strcmp(buf
, buf2
))
136 avg_gws
+=gws
=GWS_IN_USE
;
138 printf("%9u %20.7f %s\n",doc_cnt
,gws
,buf2
);
142 strmov(buf_maxlen
, buf2
);
144 if (max_doc_cnt
< doc_cnt
)
147 strmov(buf_min_gws
, buf2
);
155 doc_cnt
+= (subkeys
>= 0 ? 1 : -subkeys
);
160 printf("%9lx %20.7f %s\n", (long) info
->lastpos
,weight
,buf
);
162 printf("%9lx => %17d %s\n",(long) info
->lastpos
,-subkeys
,buf
);
164 if (verbose
&& (total
%HOW_OFTEN_TO_WRITE
)==0)
165 printf("%10ld\r",total
);
167 mi_lock_database(info
, F_UNLCK
);
174 avg_gws
+=gws
=GWS_IN_USE
;
176 printf("%9u %20.7f %s\n",doc_cnt
,gws
,buf2
);
180 strmov(buf_maxlen
, buf2
);
182 if (max_doc_cnt
< doc_cnt
)
185 strmov(buf_min_gws
, buf2
);
194 for (inx
=0;inx
<256;inx
++)
197 if ((ulong
) count
>= total
/2)
200 printf("Total rows: %lu\nTotal words: %lu\n"
201 "Unique words: %lu\nLongest word: %lu chars (%s)\n"
202 "Median length: %u\n"
203 "Average global weight: %f\n"
204 "Most common word: %lu times, weight: %f (%s)\n",
205 (long) info
->state
->records
, total
, uniq
, maxlen
, buf_maxlen
,
206 inx
, avg_gws
/uniq
, max_doc_cnt
, min_gws
, buf_min_gws
);
211 for (inx
=0; inx
<256; inx
++)
214 if (count
&& lengths
[inx
])
215 printf("%3u: %10lu %5.2f%% %20lu %4.1f%%\n", inx
,
216 (ulong
) lengths
[inx
],100.0*lengths
[inx
]/total
,(ulong
) count
,
222 if (error
&& error
!= HA_ERR_END_OF_FILE
)
223 printf("got error %d\n",my_errno
);
231 get_one_option(int optid
, const struct my_option
*opt
__attribute__((unused
)),
232 char *argument
__attribute__((unused
)))
237 complain(count
|| query
);
245 complain(dump
|| query
);
258 #include <help_start.h>
262 printf("Use: myisam_ftdump <table_name> <index_num>\n");
263 my_print_help(my_long_options
);
264 my_print_variables(my_long_options
);
265 NETWARE_SET_SCREEN_MODE(1);
269 #include <help_end.h>
271 static void complain(int val
) /* Kinda assert :-) */
275 printf("You cannot use these options together!\n");