16863 implement C23 stdbit.h
[illumos-gate.git] / usr / src / lib / libsqlite / tool / spaceanal.tcl
blob6fc3261e45a0b268f42c869bf411304f43cd2602
1 # Run this TCL script using "testfixture" in order get a report that shows
2 # how much disk space is used by a particular data to actually store data
3 # versus how much space is unused.
6 # Get the name of the database to analyze
8 if {[llength $argv]!=1} {
9 puts stderr "Usage: $argv0 database-name"
10 exit 1
12 set file_to_analyze [lindex $argv 0]
13 if {![file exists $file_to_analyze]} {
14 puts stderr "No such file: $file_to_analyze"
15 exit 1
17 if {![file readable $file_to_analyze]} {
18 puts stderr "File is not readable: $file_to_analyze"
19 exit 1
21 if {[file size $file_to_analyze]<2048} {
22 puts stderr "Empty or malformed database: $file_to_analyze"
23 exit 1
26 # Open the database
28 sqlite db [lindex $argv 0]
29 set DB [btree_open [lindex $argv 0]]
31 # In-memory database for collecting statistics
33 sqlite mem :memory:
34 set tabledef\
35 {CREATE TABLE space_used(
36 name clob, -- Name of a table or index in the database file
37 tblname clob, -- Name of associated table
38 is_index boolean, -- TRUE if it is an index, false for a table
39 nentry int, -- Number of entries in the BTree
40 payload int, -- Total amount of data stored in this table or index
41 mx_payload int, -- Maximum payload size
42 n_ovfl int, -- Number of entries that overflow
43 pri_pages int, -- Number of primary pages used
44 ovfl_pages int, -- Number of overflow pages used
45 pri_unused int, -- Number of unused bytes on primary pages
46 ovfl_unused int -- Number of unused bytes on overflow pages
47 );}
48 mem eval $tabledef
50 # This query will be used to find the root page number for every index and
51 # table in the database.
53 set sql {
54 SELECT name, tbl_name, type, rootpage
55 FROM sqlite_master WHERE type IN ('table','index')
56 UNION ALL
57 SELECT 'sqlite_master', 'sqlite_master', 'table', 2
58 ORDER BY 1
61 # Analyze every table in the database, one at a time.
63 foreach {name tblname type rootpage} [db eval $sql] {
64 puts stderr "Analyzing $name..."
65 set cursor [btree_cursor $DB $rootpage 0]
66 set go [btree_first $cursor]
67 set size 0
68 catch {unset pg_used}
69 set unused_ovfl 0
70 set n_overflow 0
71 set cnt_ovfl 0
72 set n_entry 0
73 set mx_size 0
74 set pg_used($rootpage) 1016
75 while {$go==0} {
76 incr n_entry
77 set payload [btree_payload_size $cursor]
78 incr size $payload
79 set stat [btree_cursor_dump $cursor]
80 set pgno [lindex $stat 0]
81 set freebytes [lindex $stat 4]
82 set pg_used($pgno) $freebytes
83 if {$payload>236} {
84 # if {[lindex $stat 8]==0} {error "overflow is empty with $payload"}
85 set n [expr {($payload-236+1019)/1020}]
86 incr n_overflow $n
87 incr cnt_ovfl
88 incr unused_ovfl [expr {$n*1020+236-$payload}]
89 } else {
90 # if {[lindex $stat 8]!=0} {error "overflow not empty with $payload"}
92 if {$payload>$mx_size} {set mx_size $payload}
93 set go [btree_next $cursor]
95 btree_close_cursor $cursor
96 set n_primary [llength [array names pg_used]]
97 set unused_primary 0
98 foreach x [array names pg_used] {incr unused_primary $pg_used($x)}
99 regsub -all ' $name '' name
100 set sql "INSERT INTO space_used VALUES('$name'"
101 regsub -all ' $tblname '' tblname
102 append sql ",'$tblname',[expr {$type=="index"}],$n_entry"
103 append sql ",$size,$mx_size,$cnt_ovfl,"
104 append sql "$n_primary,$n_overflow,$unused_primary,$unused_ovfl);"
105 mem eval $sql
108 # Generate a single line of output in the statistics section of the
109 # report.
111 proc statline {title value {extra {}}} {
112 set len [string length $title]
113 set dots [string range {......................................} $len end]
114 set len [string length $value]
115 set sp2 [string range { } $len end]
116 if {$extra ne ""} {
117 set extra " $extra"
119 puts "$title$dots $value$sp2$extra"
122 # Generate a formatted percentage value for $num/$denom
124 proc percent {num denom} {
125 if {$denom==0.0} {return ""}
126 set v [expr {$num*100.0/$denom}]
127 if {$v>1.0 && $v<99.0} {
128 return [format %4.1f%% $v]
129 } elseif {$v<0.1 || $v>99.9} {
130 return [format %6.3f%% $v]
131 } else {
132 return [format %5.2f%% $v]
136 # Generate a subreport that covers some subset of the database.
137 # the $where clause determines which subset to analyze.
139 proc subreport {title where} {
140 set hit 0
141 mem eval "SELECT sum(nentry) AS nentry, \
142 sum(payload) AS payload, \
143 sum(CASE is_index WHEN 1 THEN 0 ELSE payload-4*nentry END) \
144 AS data, \
145 max(mx_payload) AS mx_payload, \
146 sum(n_ovfl) as n_ovfl, \
147 sum(pri_pages) AS pri_pages, \
148 sum(ovfl_pages) AS ovfl_pages, \
149 sum(pri_unused) AS pri_unused, \
150 sum(ovfl_unused) AS ovfl_unused \
151 FROM space_used WHERE $where" {} {set hit 1}
152 if {!$hit} {return 0}
153 puts ""
154 set len [string length $title]
155 incr len 5
156 set stars "***********************************"
157 append stars $stars
158 set stars [string range $stars $len end]
159 puts "*** $title $stars"
160 puts ""
161 statline "Percentage of total database" \
162 [percent [expr {$pri_pages+$ovfl_pages}] $::file_pgcnt]
163 statline "Number of entries" $nentry
164 set storage [expr {($pri_pages+$ovfl_pages)*1024}]
165 statline "Bytes of storage consumed" $storage
166 statline "Bytes of payload" $payload [percent $payload $storage]
167 statline "Bytes of data" $data [percent $data $storage]
168 set key [expr {$payload-$data}]
169 statline "Bytes of key" $key [percent $key $storage]
170 set avgpay [expr {$nentry>0?$payload/$nentry:0}]
171 statline "Average payload per entry" $avgpay
172 set avgunused [expr {$nentry>0?($pri_unused+$ovfl_unused)/$nentry:0}]
173 statline "Average unused bytes per entry" $avgunused
174 statline "Average fanout" \
175 [format %.2f [expr {$pri_pages==0?0:($nentry+0.0)/$pri_pages}]]
176 statline "Maximum payload per entry" $mx_payload
177 statline "Entries that use overflow" $n_ovfl [percent $n_ovfl $nentry]
178 statline "Total pages used" [set allpgs [expr {$pri_pages+$ovfl_pages}]]
179 statline "Primary pages used" $pri_pages ;# [percent $pri_pages $allpgs]
180 statline "Overflow pages used" $ovfl_pages ;# [percent $ovfl_pages $allpgs]
181 statline "Unused bytes on primary pages" $pri_unused \
182 [percent $pri_unused [expr {$pri_pages*1024}]]
183 statline "Unused bytes on overflow pages" $ovfl_unused \
184 [percent $ovfl_unused [expr {$ovfl_pages*1024}]]
185 set allunused [expr {$ovfl_unused+$pri_unused}]
186 statline "Unused bytes on all pages" $allunused \
187 [percent $allunused [expr {$allpgs*1024}]]
188 return 1
191 # Output summary statistics:
193 puts "/** Disk-Space Utilization Report For $file_to_analyze"
194 puts "*** As of [clock format [clock seconds] -format {%Y-%b-%d %H:%M:%S}]"
195 puts ""
196 set fsize [file size [lindex $argv 0]]
197 set file_pgcnt [expr {$fsize/1024}]
198 set usedcnt [mem eval {SELECT sum(pri_pages+ovfl_pages) FROM space_used}]
199 set freecnt [expr {$file_pgcnt-$usedcnt-1}]
200 set freecnt2 [lindex [btree_get_meta $DB] 0]
201 statline {Pages in the whole file (measured)} $file_pgcnt
202 set file_pgcnt2 [expr {$usedcnt+$freecnt2+1}]
203 statline {Pages in the whole file (calculated)} $file_pgcnt2
204 statline {Pages that store data} $usedcnt [percent $usedcnt $file_pgcnt]
205 statline {Pages on the freelist (per header)}\
206 $freecnt2 [percent $freecnt2 $file_pgcnt]
207 statline {Pages on the freelist (calculated)}\
208 $freecnt [percent $freecnt $file_pgcnt]
209 statline {Header pages} 1 [percent 1 $file_pgcnt]
211 set ntable [db eval {SELECT count(*)+1 FROM sqlite_master WHERE type='table'}]
212 statline {Number of tables in the database} $ntable
213 set nindex [db eval {SELECT count(*) FROM sqlite_master WHERE type='index'}]
214 set autoindex [db eval {SELECT count(*) FROM sqlite_master
215 WHERE type='index' AND name LIKE '(% autoindex %)'}]
216 set manindex [expr {$nindex-$autoindex}]
217 statline {Number of indices} $nindex
218 statline {Number of named indices} $manindex [percent $manindex $nindex]
219 statline {Automatically generated indices} $autoindex \
220 [percent $autoindex $nindex]
222 set bytes_data [mem eval "SELECT sum(payload-4*nentry) FROM space_used
223 WHERE NOT is_index AND name!='sqlite_master'"]
224 set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
225 statline "Size of the file in bytes" $fsize
226 statline "Bytes of payload stored" $total_payload \
227 [percent $total_payload $fsize]
228 statline "Bytes of user data stored" $bytes_data \
229 [percent $bytes_data $fsize]
231 # Output table rankings
233 puts ""
234 puts "*** Page counts for all tables with their indices ********************"
235 puts ""
236 mem eval {SELECT tblname, count(*) AS cnt, sum(pri_pages+ovfl_pages) AS size
237 FROM space_used GROUP BY tblname ORDER BY size DESC, tblname} {} {
238 statline [string toupper $tblname] $size [percent $size $file_pgcnt]
241 # Output subreports
243 if {$nindex>0} {
244 subreport {All tables and indices} 1
246 subreport {All tables} {NOT is_index}
247 if {$nindex>0} {
248 subreport {All indices} {is_index}
250 foreach tbl [mem eval {SELECT name FROM space_used WHERE NOT is_index
251 ORDER BY name}] {
252 regsub ' $tbl '' qn
253 set name [string toupper $tbl]
254 set n [mem eval "SELECT count(*) FROM space_used WHERE tblname='$qn'"]
255 if {$n>1} {
256 subreport "Table $name and all its indices" "tblname='$qn'"
257 subreport "Table $name w/o any indices" "name='$qn'"
258 subreport "Indices of table $name" "tblname='$qn' AND is_index"
259 } else {
260 subreport "Table $name" "name='$qn'"
264 # Output instructions on what the numbers above mean.
266 puts {
267 *** Definitions ******************************************************
269 Number of pages in the whole file
271 The number of 1024-byte pages that go into forming the complete database
273 Pages that store data
275 The number of pages that store data, either as primary B*Tree pages or
276 as overflow pages. The number at the right is the data pages divided by
277 the total number of pages in the file.
279 Pages on the freelist
281 The number of pages that are not currently in use but are reserved for
282 future use. The percentage at the right is the number of freelist pages
283 divided by the total number of pages in the file.
285 Header pages
287 The number of pages of header overhead in the database. This value is
288 always 1. The percentage at the right is the number of header pages
289 divided by the total number of pages in the file.
291 Number of tables in the database
293 The number of tables in the database, including the SQLITE_MASTER table
294 used to store schema information.
296 Number of indices
298 The total number of indices in the database.
300 Number of named indices
302 The number of indices created using an explicit CREATE INDEX statement.
304 Automatically generated indices
306 The number of indices used to implement PRIMARY KEY or UNIQUE constraints
307 on tables.
309 Size of the file in bytes
311 The total amount of disk space used by the entire database files.
313 Bytes of payload stored
315 The total number of bytes of payload stored in the database. Payload
316 includes both key and data. The content of the SQLITE_MASTER table is
317 counted when computing this number. The percentage at the right shows
318 the payload divided by the total file size.
320 Bytes of user data stored
322 The total number of bytes of data stored in the database, not counting
323 the database schema information stored in the SQLITE_MASTER table. The
324 percentage at the right is the user data size divided by the total file
325 size.
327 Percentage of total database
329 The amount of the complete database file that is devoted to storing
330 information described by this category.
332 Number of entries
334 The total number of B*Tree key/value pairs stored under this category.
336 Bytes of storage consumed
338 The total amount of disk space required to store all B*Tree entries
339 under this category. The is the total number of pages used times
340 the pages size (1024).
342 Bytes of payload
344 The amount of payload stored under this category. Payload is the sum
345 of keys and data. Each table entry has 4 bytes of key and an arbitrary
346 amount of data. Each index entry has 4 or more bytes of key and no
347 data. The percentage at the right is the bytes of payload divided by
348 the bytes of storage consumed.
350 Bytes of data
352 The amount of data stored under this category. The data space reported
353 includes formatting information such as nul-terminators and field-lengths
354 that are stored with the data. The percentage at the right is the bytes
355 of data divided by bytes of storage consumed.
357 Bytes of key
359 The sum of the sizes of all keys under this category. The percentage at
360 the right is the bytes of key divided by the bytes of storage consumed.
362 Average payload per entry
364 The average amount of payload on each entry. This is just the bytes of
365 payload divided by the number of entries.
367 Average unused bytes per entry
369 The average amount of free space remaining on all pages under this
370 category on a per-entry basis. This is the number of unused bytes on
371 all pages divided by the number of entries.
373 Maximum payload per entry
375 The largest payload size of any entry.
377 Entries that use overflow
379 Up to 236 bytes of payload for each entry are stored directly in the
380 primary B*Tree page. Any additional payload is stored on a linked list
381 of overflow pages. This is the number of entries that exceed 236 bytes
382 in size. The value to the right is the number of entries that overflow
383 divided by the total number of entries.
385 Total pages used
387 This is the number of 1024 byte pages used to hold all information in
388 the current category. This is the sum of primary and overflow pages.
390 Primary pages used
392 This is the number of primary B*Tree pages used.
394 Overflow pages used
396 The total number of overflow pages used for this category.
398 Unused bytes on primary pages
400 The total number of bytes of unused space on all primary pages. The
401 percentage at the right is the number of unused bytes divided by the
402 total number of bytes on primary pages.
404 Unused bytes on overflow pages
406 The total number of bytes of unused space on all overflow pages. The
407 percentage at the right is the number of unused bytes divided by the
408 total number of bytes on overflow pages.
410 Unused bytes on all pages
412 The total number of bytes of unused space on all primary and overflow
413 pages. The percentage at the right is the number of unused bytes
414 divided by the total number of bytes.
417 # Output the database
419 puts "**********************************************************************"
420 puts "The entire text of this report can be sourced into any SQL database"
421 puts "engine for further analysis. All of the text above is an SQL comment."
422 puts "The data used to generate this report follows:"
423 puts "*/"
424 puts "BEGIN;"
425 puts $tabledef
426 unset -nocomplain x
427 mem eval {SELECT * FROM space_used} x {
428 puts -nonewline "INSERT INTO space_used VALUES("
429 regsub ' $x(name) '' qn
430 regsub ' $x(tblname) '' qtn
431 puts -nonewline "'$qn','$qtn',"
432 puts -nonewline "$x(is_index),$x(nentry),$x(payload),$x(mx_payload),"
433 puts -nonewline "$x(n_ovfl),$x(pri_pages),$x(ovfl_pages),$x(pri_unused),"
434 puts "$x(ovfl_unused));"
436 puts "COMMIT;"