Remove single use function, revise comment, fix inlining failure
[sbcl.git] / src / code / debug-var-io.lisp
blobb39f8ff80bc84fb351a3ba6b2cdfe4d25201c3d9
1 ;;;; variable-length encoding and other i/o tricks for the debugger
3 ;;;; This software is part of the SBCL system. See the README file for
4 ;;;; more information.
5 ;;;;
6 ;;;; This software is derived from the CMU CL system, which was
7 ;;;; written at Carnegie Mellon University and released into the
8 ;;;; public domain. The software is in the public domain and is
9 ;;;; provided with absolutely no warranty. See the COPYING and CREDITS
10 ;;;; files for more information.
12 (in-package "SB!C")
14 ;;;; reading variable length integers
15 ;;;;
16 ;;;; The debug info representation makes extensive use of 32-bit
17 ;;;; integers encoded in an octet vector using between one and five
18 ;;;; octets. Each octet
19 ;;;;
20 ;;;; miiiiiii
21 ;;;;
22 ;;;; encodes 7 bits of the integer (i) while the final bit indicates
23 ;;;; whether more (m) octets follow. For example:
24 ;;;;
25 ;;;; #x88888888 => 10001000 10010001 10100010 11000100 00001000
27 ;;; Given an octet-vector SOURCE and an initial offset START, read a
28 ;;; variable length integer and return two values 1) the integer 2)
29 ;;; the new offset
30 (defun var-integer-decoding-error (source start offset)
31 (error "~@<Improperly terminated variable-length integer in ~S at ~
32 ~D (starting at ~D).~@:>"
33 source offset start))
35 (macrolet
36 ((define-read-var-integer (function-name macro-name source-type accessor)
37 `(progn
38 (declaim (ftype (function (,source-type index)
39 (values (unsigned-byte 32) index))
40 ,function-name))
41 (defun ,function-name (source start)
42 (loop
43 for offset :of-type index from start ; position in buffer
44 for k :of-type (integer 0 28) from 0 by 7 ; position in integer
45 for octet = (,accessor source offset)
46 for finalp = (not (logbitp 7 octet))
47 for accum :of-type (unsigned-byte 36) = (mask-field (byte 7 0) octet)
48 then (dpb octet (byte 7 k) accum)
49 when (and (= k 28) (not (zerop (ldb (byte 4 4) octet))))
50 do (var-integer-decoding-error source start offset)
51 when finalp return (values accum (1+ offset))))
53 (defmacro ,macro-name (vector index)
54 `(multiple-value-bind (value new-index)
55 (,',function-name ,vector ,index)
56 (setf ,index new-index)
57 value)))))
59 (define-read-var-integer read-var-integer read-var-integerf
60 (array (unsigned-byte 8) 1) aref)
62 #-sb-xc-host
63 (define-read-var-integer sap-read-var-integer sap-read-var-integerf
64 system-area-pointer sap-ref-8))
66 ;;; Take an adjustable vector VECTOR with a fill pointer and push the
67 ;;; variable length representation of VALUE on the end.
68 (declaim (ftype (sfunction ((unsigned-byte 32) (array (unsigned-byte 8) 1)) (integer 0 5))
69 write-var-integer))
70 (defun write-var-integer (value vector)
71 (loop
72 for v :of-type (unsigned-byte 32) = value then (ash v -7)
73 for v-next :of-type (unsigned-byte 32) = (ash v -7)
74 for i from 0
75 until (and (plusp i) (zerop v))
76 do (vector-push-extend (dpb (if (zerop v-next) 0 1) (byte 1 7)
77 (ldb (byte 7 0) v))
78 vector)
79 finally (return i)))
82 ;;;; packed strings
83 ;;;;
84 ;;;; A packed string is a variable length integer length followed by
85 ;;;; the character codes.
87 ;;; Read a packed string from VEC starting at INDEX, advancing INDEX.
88 (defmacro read-var-string (vec index)
89 (once-only ((len `(read-var-integerf ,vec ,index)))
90 (once-only ((res `(make-string ,len)))
91 `(progn
92 (loop for i from 0 below ,len
93 do (setf (aref ,res i)
94 (code-char (read-var-integerf ,vec ,index))))
95 ,res))))
97 ;;; Write STRING into VEC (adjustable, with fill-pointer) represented
98 ;;; as the length (in a var-length integer) followed by the codes of
99 ;;; the characters.
100 (defun write-var-string (string vec)
101 (declare (simple-string string))
102 (let ((len (length string)))
103 (write-var-integer len vec)
104 (dotimes (i len)
105 (write-var-integer (char-code (schar string i)) vec)))
106 (values))
108 ;;;; packed bit vectors
110 ;;; Read the specified number of BYTES out of VEC at INDEX and convert
111 ;;; them to a BIT-VECTOR. INDEX is incremented.
112 (defmacro read-packed-bit-vector (bytes vec index)
113 (once-only ((n-bytes bytes))
114 (once-only ((n-res `(make-array (* ,n-bytes 8) :element-type 'bit)))
115 `(progn
116 (%byte-blt ,vec ,index ,n-res 0 ,n-bytes)
117 (incf ,index ,n-bytes)
118 ,n-res))))
120 ;;; Code fixup locations are stored as varints even more densely than
121 ;;; would be an array of unsigned-byte. The backing storage is an integer
122 ;;; (typically a bignum, but a fixnum will do),
123 ;;; and each value represents the difference from the preceding value.
125 ;;; Somebody should try changing the x86 code to use this representation
126 ;;; and profile that to see if it makes performance worse.
127 ;;; x86-64 does not care about speed here, because GC is unaffected
128 ;;; except when saving a core.
130 (defun integer-from-octets (octets)
131 (declare (type (array (unsigned-byte 8) (*)) octets))
132 (let ((result 0) (shift 0))
133 (dovector (byte octets result)
134 (setf result (logior result (ash byte shift))
135 shift (+ shift 8)))))
137 ;;; XXX: Maybe rename this if we use it to pack debug-fun-ish things.
138 (defun pack-code-fixup-locs (list)
139 ;; Estimate the length
140 (let ((bytes (make-array (* 2 (length list)) :fill-pointer 0 :adjustable t
141 :element-type '(unsigned-byte 8)))
142 (prev 0))
143 (dolist (x list)
144 (aver (> x prev)) ; the incoming list must be sorted
145 (write-var-integer (- x prev) bytes)
146 (setq prev x))
147 ;; Pack into a single integer
148 (let ((result (integer-from-octets bytes)))
149 (aver (equal (unpack-code-fixup-locs result) list))
150 result)))
152 (defmacro do-packed-varints ((loc locs) &body body)
153 (with-unique-names (integer byte bytepos shift acc prev)
154 `(let ((,integer ,locs)
155 (,bytepos 0)
156 (,shift 0)
157 (,acc 0)
158 (,prev 0))
159 (loop
160 (let ((,byte (ldb (byte 8 ,bytepos) ,integer)))
161 (incf ,bytepos 8)
162 (setf ,acc (logior ,acc (ash (logand ,byte #x7f) ,shift)))
163 (cond ((logtest ,byte #x80) (incf ,shift 7))
164 ;; No offset can be zero, so this is the delimiter
165 ((zerop ,acc) (return))
167 (let ((,loc (+ ,prev ,acc))) ,@body (setq ,prev ,loc))
168 (setq ,acc 0 ,shift 0))))))))
170 (defun unpack-code-fixup-locs (packed-integer)
171 (collect ((locs))
172 (do-packed-varints (loc packed-integer) (locs loc))
173 (locs)))
175 (define-symbol-macro lz-symbol-1 210) ; arbitrary value that isn't frequent in the input
176 (define-symbol-macro lz-symbol-2 218) ; ditto
178 ;;; A somewhat bad (slow and not-very-squishy) compressor
179 ;;; that gets between 15% and 20% space savings in debug blocks.
180 ;;; Lengthy input may be compressible by as much as 3:1.
181 #-sb-xc-host
182 (declaim (ftype (sfunction ((simple-array (unsigned-byte 8) 1)) (simple-array (unsigned-byte 8) 1))
183 lz-compress))
184 (defun lz-compress (input)
185 (let ((output (make-array (length input)
186 :element-type '(unsigned-byte 8)
187 :fill-pointer 0 :adjustable t))
188 (tempbuf (make-array 8 :element-type '(unsigned-byte 8)
189 :fill-pointer 0)))
190 (flet ((compare (index1 index2 end &aux (start1 index1))
191 (loop
192 (when (or (eql index2 end)
193 (not (eql (aref input index1) (aref input index2))))
194 (return-from compare (- index1 start1)))
195 (incf index1)
196 (incf index2))))
197 (loop with pos of-type index = 0
198 while (< pos (length input))
200 (let ((match-start 0)
201 (match-len 2))
202 ;; limit the lookback amount to make the running time n^2 in input
203 ;; length instead of n^3.
204 (loop for start from (max 0 (- pos 4000)) below pos
206 (let ((this-len (compare start pos (length input))))
207 (when (> this-len match-len)
208 (setq match-start start match-len this-len))))
209 (let ((offset (- pos match-start)))
210 ;; Length = 3 is emitted as symbol-2 followed by a single byte
211 ;; for the offset. Longer lengths are written as symbol-1 and
212 ;; then two varint-encoded values. We first determine whether
213 ;; writing the back-reference is shorter than the source bytes.
214 (cond ((and (> match-len 3)
215 (progn (setf (fill-pointer tempbuf) 0)
216 (write-var-integer offset tempbuf)
217 (write-var-integer match-len tempbuf)
218 (< (1+ (fill-pointer tempbuf)) match-len)))
219 ;; marker symbol if followed by 0 would represent a literal
220 (aver (/= (aref tempbuf 0) 0))
221 (vector-push-extend lz-symbol-1 output)
222 (dovector (elt tempbuf) (vector-push-extend elt output))
223 (incf pos match-len))
224 ((and (= match-len 3) (< offset 256))
225 (vector-push-extend lz-symbol-2 output)
226 (vector-push-extend offset output)
227 (incf pos 3))
229 (let ((byte (aref input pos)))
230 (incf pos)
231 (vector-push-extend byte output)
232 (when (or (= byte lz-symbol-1) (= byte lz-symbol-2))
233 (vector-push-extend 0 output)))))))))
234 (let ((result
235 #+sb-xc-host
236 (coerce output '(simple-array (unsigned-byte 8) (*)))
237 #-sb-xc-host
238 (%shrink-vector (%array-data output) (fill-pointer output))))
239 (aver (equalp input (lz-decompress result)))
240 result)))
242 #-sb-xc-host
243 (declaim (ftype (sfunction ((simple-array (unsigned-byte 8) 1)) (simple-array (unsigned-byte 8) 1))
244 lz-decompress))
245 (defun lz-decompress (input)
246 (let* ((length (length input))
247 (output (make-array (* length 2)
248 :element-type '(unsigned-byte 8)
249 :fill-pointer 0 :adjustable t))
250 (inpos 0))
251 (flet ((copy (offset length)
252 (let ((index (- (fill-pointer output) offset)))
253 (dotimes (i length)
254 (vector-push-extend (aref output index) output)
255 (incf index)))))
256 (loop while (< inpos length)
258 (let ((byte (aref input inpos)))
259 (incf inpos)
260 (cond ((= byte lz-symbol-1) ; general case
261 (let ((byte (aref input inpos)))
262 (cond ((= byte 0) ; literal symbol
263 (incf inpos)
264 (vector-push-extend lz-symbol-1 output))
266 (binding* (((offset new-inpos)
267 (read-var-integer input inpos))
268 ((len new-new-inpos)
269 (read-var-integer input new-inpos)))
270 (setf inpos new-new-inpos)
271 (copy offset len))))))
272 ((= byte lz-symbol-2) ; special case
273 (let ((offset (aref input inpos)))
274 (incf inpos)
275 (if (= offset 0) ; literal symbol
276 (vector-push-extend lz-symbol-2 output)
277 (copy offset 3))))
279 (vector-push-extend byte output))))))
280 #+sb-xc-host
281 (coerce output '(simple-array (unsigned-byte 8) (*)))
282 #-sb-xc-host
283 (%shrink-vector (%array-data output) (fill-pointer output))))