2 # aside from this initial boilerplate, this is actually -*- scheme -*- code
3 main
='(module-ref (resolve-module '\''(scripts read-scheme-source)) '\'main
')'
4 exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
6 ;;; read-scheme-source
--- Read a
file, recognizing scheme forms and comments
8 ;; Copyright
(C
) 2001 Free Software Foundation
, Inc.
10 ;; This program is free software
; you can redistribute it and
/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation
; either version
2, or
13 ;; (at your option
) any later version.
15 ;; This program is distributed
in the hope that it will be useful
,
16 ;; but WITHOUT ANY WARRANTY
; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License
for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this software
; see the
file COPYING. If not
, write to
22 ;; the Free Software Foundation
, Inc.
, 59 Temple Place
, Suite
330,
23 ;; Boston
, MA
02111-1307 USA
25 ;;; Author
: Thien-Thi Nguyen
29 ;; Usage
: read-scheme-source FILE1 FILE2 ...
31 ;; This program parses each FILE and writes to stdout sexps that describe the
32 ;; top-level structures of the
file: scheme forms
, single-line comments
, and
33 ;; hash-bang comments. You can further process these
(to associate comments
34 ;; w
/ scheme forms as a kind of documentation
, for example
).
36 ;; The output sexps have one of these forms
:
38 ;; (quote
(filename FILENAME
))
40 ;; (quote
(comment
:leading-semicolons N
43 ;; (quote
(whitespace
:text LINE
))
45 ;; (quote
(hash-bang-comment
:line LINUM
47 ;; :text-list
(LINE1 LINE2 ...
)))
49 ;; (quote
(following-form-properties
:line LINUM
52 ;; :signature SIGNATURE
53 ;; :std-int-doc DOCSTRING
))
57 ;; The first four are straightforward
(both FILENAME and LINE are
strings sans
58 ;; newline
, while LINUM and N are integers
). The last two always go together
,
59 ;; in that order. SEXP is scheme code processed only by
`read' and then
62 ;; The :type field may be omitted if the form is not recognized. Otherwise,
63 ;; TYPE may be one of: procedure, alias, define-module, variable.
65 ;; The :signature field may be omitted if the form is not a procedure.
66 ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
68 ;; If the
type is
`procedure' and the form has a standard internal docstring
69 ;; (first body form a string), that is extracted in full -- including any
70 ;; embedded newlines -- and recorded by field :std-int-doc.
73 ;; Usage from a program: The output list of sexps can be retrieved by scheme
74 ;; programs w/o having to capture stdout, like so:
76 ;; (use-modules (scripts read-scheme-source))
77 ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
79 ;; There are also two convenience procs exported for use by Scheme programs:
81 ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
82 ;; have the same number of leading semicolons.
84 ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
85 ;; the ":tags", and return alist of (TAG . VAL) elems.
87 ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
88 ;; Make `annotate
!' extensible.
92 (define-module (scripts read-scheme-source)
93 :use-module (ice-9 rdelim)
94 :export (read-scheme-source
95 read-scheme-source-silently
99 ;; Try to figure out what FORM is and its various attributes.
100 ;; Call proc NOTE! with key (a symbol) and value.
102 (define (annotate! form note!)
103 (cond ((and (list? form)
105 (eq? 'define
(car form
))
107 (symbol?
(caadr form
)))
108 (note
! ':type 'procedure
)
109 (note
! ':signature (cadr form))
110 (and (< 3 (length form))
111 (string? (caddr form))
112 (note! ':std-int-doc
(caddr form
))))
115 (eq?
'define (car form))
116 (symbol? (cadr form))
118 (< 3 (length (caddr form)))
119 (eq? 'lambda
(car
(caddr form
)))
120 (string?
(caddr
(caddr form
))))
121 (note
! ':type 'procedure
)
122 (note
! ':signature (cons (cadr form) (cadr (caddr form))))
123 (note! ':std-int-doc
(caddr
(caddr form
))))
126 (eq?
'define (car form))
127 (symbol? (cadr form))
128 (symbol? (caddr form)))
129 (note! ':type 'alias))
131 (eq? 'define-module
(car form
)))
132 (note
! ':type 'define-module
))
133 ;; Add other types here.
134 (else (note
! ':type 'variable
))))
136 ;; Process FILE
, calling NB
! on parsed top-level elements.
137 ;; Recognized
: #!-!# and regular comments in addition to normal forms.
139 (define
(process
file nb
!)
140 (nb
! `'(filename ,file))
141 (let ((hash-bang-rx (make-regexp "^#!"))
142 (bang-hash-rx (make-regexp "^!#"))
143 (all-comment-rx (make-regexp "^[ \t]*(;+)"))
144 (all-whitespace-rx (make-regexp "^[ \t]*$"))
145 (p (open-input-file file)))
146 (let loop ((n (1+ (port-line p))) (line (read-line p)))
150 (cond ((regexp-exec hash-bang-rx line)
151 (let loop ((line (read-line p))
153 (if (or (eof-object? line)
154 (regexp-exec bang-hash-rx line))
155 (nb! `'(hash-bang-comment
157 :line-count ,(1+ (length text))
162 ((regexp-exec all-whitespace-rx line)
163 (nb! `'(whitespace
:text
,line
)))
164 ((regexp-exec all-comment-rx line
)
168 ,(let ((m1 (vector-ref m 1)))
169 (- (cdr m1) (car m1)))
172 (unread-string line p)
173 (let* ((form (read p))
174 (count (- (port-line p) n))
175 (props (let* ((props '())
178 (append props args)))))
179 (annotate! form prop+)
181 (or (= count 1) ; ugh
184 (set! count (1+ count))))
185 (nb! `'(following-form-properties
190 (loop (1+ (port-line p)) (read-line p)))))))
194 (define (read-scheme-source-silently . files)
195 "See commentary in module (scripts read-scheme-source)."
197 (for-each
(lambda
(file)
198 (process
file (lambda
(e
) (set! res
(cons e res
)))))
202 (define
(read-scheme-source . files
)
203 "See commentary in module (scripts read-scheme-source)."
204 (for-each
(lambda
(file)
205 (process
file (lambda
(e
) (write e
) (newline
))))
208 ;; Recognize
: (quote
(SYM
:TAG1 VAL1
:TAG2 VAL2 ...
))
209 ;; and
return alist
: ((TAG1 . VAL1
) (TAG2 . VAL2
) ...
)
210 ;; where the tags are symbols.
212 (define
(quoted? sym form
)
215 (eq?
'quote (car form))
216 (let ((inside (cadr form)))
218 (< 0 (length inside))
219 (eq? sym (car inside))
220 (let loop ((ls (cdr inside)) (alist '()))
223 (let ((first
(car
ls)))
227 (acons
(string-
>symbol
228 (substring
(symbol-
>string first
) 1))
232 ;; Filter FORMS
, combining contiguous comment forms that have the same number
233 ;; of leading semicolons. Do not include
in them whitespace lines.
234 ;; Whitespace lines outside of such comment groupings are ignored
, as are
235 ;; hash-bang comments. All other forms are passed through unchanged.
237 (define
(clump forms
)
238 (let loop
((forms forms
) (acc
'()) (pass-this-one-through? #f))
240 (reverse acc) ; retval
241 (let ((form (car forms)))
242 (cond (pass-this-one-through?
243 (loop (cdr forms) (cons form acc) #f))
244 ((quoted? 'following-form-properties form
)
245 (loop
(cdr forms
) (cons form acc
) #t))
246 ((quoted?
'whitespace form) ;;; ignore
247 (loop (cdr forms) acc #f))
248 ((quoted? 'hash-bang-comment form
) ;;; ignore
for now
249 (loop
(cdr forms
) acc
#f))
250 ((quoted?
'comment form)
252 (let cloop ((inner-forms (cdr forms))
253 (level (assq-ref alist 'leading-semicolons
))
254 (text
(list
(assq-ref alist
'text))))
257 (cons (cons level (reverse text))
260 (if (null? inner-forms)
262 (let ((inner-form (car inner-forms)))
263 (cond ((quoted? 'comment inner-form
)
264 => (lambda
(inner-alist
)
268 'leading-semicolons)))
269 (if (= new-level level)
270 (cloop (cdr inner-forms)
278 (else (loop
(cdr forms
) (cons form acc
) #f)))))))
280 ;;; script entry point
282 (define main read-scheme-source
)
284 ;;; read-scheme-source ends here