scan: Fix a utf-8 bug for string length
[jimtcl.git] / binary.tcl
blobedc6eb158de7e5228588fac9b2edda7dd19a481c
1 # Implements the 'binary scan' and 'binary format' commands.
3 # (c) 2010 Steve Bennett <steveb@workware.net.au>
5 # See LICENCE in this directory for licensing.
7 package require pack
8 package require regexp
10 proc binary {cmd args} {
11 tailcall "binary $cmd" {*}$args
14 proc "binary format" {formatString args} {
15 set bitoffset 0
16 set result {}
17 # This RE is too unreliable...
18 foreach {conv t u n} [regexp -all -inline {([^[:space:]])(u)?([*0-9]*)} $formatString] {
19 switch -exact -- $t {
20 a -
21 A {
22 set value [binary::nextarg args]
23 set sn [string bytelength $value]
24 if {$n ne "*"} {
25 if {$n eq ""} {
26 set n 1
28 if {$n > $sn} {
29 # Need to pad the string with spaces or nulls
30 append value [string repeat [dict get {A " " a \x00} $t] $($n - $sn)]
32 } else {
33 set n $sn
35 if {$n} {
36 set bitoffset [pack result $value -str $(8 * $n) $bitoffset]
39 x {
40 if {$n eq "*"} {
41 return -code error {cannot use "*" in format string with "x"}
43 if {$n eq ""} {
44 set n 1
46 loop i 0 $n {
47 set bitoffset [pack result 0 -intbe 8 $bitoffset]
50 @ {
51 if {$n eq ""} {
52 return -code error {missing count for "@" field specifier}
54 if {$n eq "*"} {
55 set bitoffset $(8 * [string bytelength $result])
56 } else {
57 # May need to pad it out
58 set max [string bytelength $result]
59 append result [string repeat \x00 $($n - $max)]
60 set bitoffset $(8 * $n)
63 X {
64 if {$n eq "*"} {
65 set bitoffset 0
66 } elseif {$n eq ""} {
67 incr bitoffset -8
68 } else {
69 incr bitoffset $($n * -8)
71 if {$bitoffset < 0} {
72 set bitoffset 0
75 default {
76 if {![info exists ::binary::scalarinfo($t)]} {
77 return -code error "bad field specifier \"$t\""
80 # A scalar (integer or float) type
81 lassign $::binary::scalarinfo($t) type convtype size prefix
82 set value [binary::nextarg args]
84 if {$type in {bin hex}} {
85 set value [split $value {}]
87 set vn [llength $value]
88 if {$n eq "*"} {
89 set n $vn
90 } elseif {$n eq ""} {
91 set n 1
92 set value [list $value]
93 } elseif {$vn < $n} {
94 if {$type in {bin hex}} {
95 # Need to pad the list with zeros
96 lappend value {*}[lrepeat $($n - $vn) 0]
97 } else {
98 return -code error "number of elements in list does not match count"
100 } elseif {$vn > $n} {
101 # Need to truncate the list
102 set value [lrange $value 0 $n-1]
105 set convtype -$::binary::convtype($convtype)
107 foreach v $value {
108 set bitoffset [pack result $prefix$v $convtype $size $bitoffset]
110 # Now pad out with zeros to the end of the current byte
111 if {$bitoffset % 8} {
112 set bitoffset [pack result 0 $convtype $(8 - $bitoffset % 8) $bitoffset]
117 return $result
120 proc "binary scan" {value formatString {args varName}} {
121 # Pops the next arg from the front of the list and returns it.
122 # Throws an error if no more args
123 set bitoffset 0
124 set count 0
125 # This RE is too unreliable...
126 foreach {conv t u n} [regexp -all -inline {([^[:space:]])(u)?([*0-9]*)} $formatString] {
127 set rembytes $([string bytelength $value] - $bitoffset / 8)
128 switch -exact -- $t {
131 if {$n eq "*"} {
132 set n $rembytes
133 } elseif {$n eq ""} {
134 set n 1
136 if {$n > $rembytes} {
137 break
140 set var [binary::nextarg varName]
142 set result [unpack $value -str $bitoffset $($n * 8)]
143 incr bitoffset $([string bytelength $result] * 8)
144 if {$t eq "A"} {
145 set result [string trimright $result]
149 # Skip bytes
150 if {$n eq "*"} {
151 set n $rembytes
152 } elseif {$n eq ""} {
153 set n 1
155 if {$n > $rembytes} {
156 set n $rembytes
158 incr bitoffset $($n * 8)
159 continue
162 # Back up bytes
163 if {$n eq "*"} {
164 set bitoffset 0
165 continue
167 if {$n eq ""} {
168 set n 1
170 if {$n * 8 > $bitoffset} {
171 set bitoffset 0
172 continue
174 incr bitoffset -$($n * 8)
175 continue
178 if {$n eq ""} {
179 return -code error {missing count for "@" field specifier}
181 if {$n eq "*" || $n > $rembytes + $bitoffset / 8} {
182 incr bitoffset $($rembytes * 8)
183 } elseif {$n < 0} {
184 set bitoffset 0
185 } else {
186 set bitoffset $($n * 8)
188 continue
190 default {
191 if {![info exists ::binary::scalarinfo($t)]} {
192 return -code error "bad field specifier \"$t\""
194 # A scalar (integer or float) type
195 lassign $::binary::scalarinfo($t) type convtype size prefix
196 set var [binary::nextarg varName]
198 if {$n eq "*"} {
199 set n $($rembytes * 8 / $size)
200 } else {
201 if {$n eq ""} {
202 set n 1
205 if {$n * $size > $rembytes * 8} {
206 break
209 if {$type in {hex bin}} {
210 set u u
212 set convtype -$u$::binary::convtype($convtype)
214 set result {}
215 loop i 0 $n {
216 set v [unpack $value $convtype $bitoffset $size]
217 if {$type in {bin hex}} {
218 append result [lindex {0 1 2 3 4 5 6 7 8 9 a b c d e f} $v]
219 } else {
220 lappend result $v
222 incr bitoffset $size
224 # Now skip to the end of the current byte
225 if {$bitoffset % 8} {
226 incr bitoffset $(8 - ($bitoffset % 8))
230 uplevel 1 [list set $var $result]
231 incr count
233 return $count
236 # Pops the next arg from the front of the list and returns it.
237 # Throws an error if no more args
238 proc binary::nextarg {&arglist} {
239 if {[llength $arglist] == 0} {
240 return -level 2 -code error "not enough arguments for all format specifiers"
242 set arglist [lassign $arglist arg]
243 return $arg
246 set binary::scalarinfo {
247 c {int be 8}
248 s {int le 16}
249 t {int host 16}
250 S {int be 16}
251 i {int le 32}
252 I {int be 32}
253 n {int host 32}
254 w {int le 64}
255 W {int be 64}
256 m {int host 64}
257 h {hex le 4 0x}
258 H {hex be 4 0x}
259 b {bin le 1}
260 B {bin be 1}
261 r {float fle 32}
262 R {float fbe 32}
263 f {float fhost 32}
264 q {float fle 64}
265 Q {float fbe 64}
266 d {float fhost 64}
268 set binary::convtype {
269 be intbe
270 le intle
271 fbe floatbe
272 fle floatle
274 if {$::tcl_platform(byteOrder) eq "bigEndian"} {
275 array set binary::convtype {host intbe fhost floatbe}
276 } else {
277 array set binary::convtype {host intle fhost floatle}