* lisp/emacs-lisp/pcase.el (pcase-let*, pcase-let): Add debug and
[emacs.git] / lisp / emacs-lisp / pcase.el
blob0e01758b55999c587cb123e3987847ad6023380a
1 ;;; pcase.el --- ML-style pattern-matching macro for Elisp
3 ;; Copyright (C) 2010 Free Software Foundation, Inc.
5 ;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
6 ;; Keywords:
8 ;; This file is part of GNU Emacs.
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
23 ;;; Commentary:
25 ;; ML-style pattern matching.
26 ;; The entry points are autoloaded.
28 ;; Todo:
30 ;; - provide ways to extend the set of primitives, with some kind of
31 ;; define-pcase-matcher. We could easily make it so that (guard BOOLEXP)
32 ;; could be defined this way, as a shorthand for (pred (lambda (_) BOOLEXP)).
33 ;; But better would be if we could define new ways to match by having the
34 ;; extension provide its own `pcase-split-<foo>' thingy.
35 ;; - ideally we'd want (pcase s ((re RE1) E1) ((re RE2) E2)) to be able to
36 ;; generate a lex-style DFA to decide whether to run E1 or E2.
38 ;;; Code:
40 (eval-when-compile (require 'cl))
42 ;; Macro-expansion of pcase is reasonably fast, so it's not a problem
43 ;; when byte-compiling a file, but when interpreting the code, if the pcase
44 ;; is in a loop, the repeated macro-expansion becomes terribly costly, so we
45 ;; memoize previous macro expansions to try and avoid recomputing them
46 ;; over and over again.
47 (defconst pcase-memoize (make-hash-table :weakness t :test 'equal))
49 ;;;###autoload
50 (defmacro pcase (exp &rest cases)
51 "Perform ML-style pattern matching on EXP.
52 CASES is a list of elements of the form (UPATTERN CODE...).
54 UPatterns can take the following forms:
55 _ matches anything.
56 SYMBOL matches anything and binds it to SYMBOL.
57 (or UPAT...) matches if any of the patterns matches.
58 (and UPAT...) matches if all the patterns match.
59 `QPAT matches if the QPattern QPAT matches.
60 (pred PRED) matches if PRED applied to the object returns non-nil.
61 (guard BOOLEXP) matches if BOOLEXP evaluates to non-nil.
63 QPatterns can take the following forms:
64 (QPAT1 . QPAT2) matches if QPAT1 matches the car and QPAT2 the cdr.
65 ,UPAT matches if the UPattern UPAT matches.
66 STRING matches if the object is `equal' to STRING.
67 ATOM matches if the object is `eq' to ATOM.
68 QPatterns for vectors are not implemented yet.
70 PRED can take the form
71 FUNCTION in which case it gets called with one argument.
72 (FUN ARG1 .. ARGN) in which case it gets called with N+1 arguments.
73 A PRED of the form FUNCTION is equivalent to one of the form (FUNCTION).
74 PRED patterns can refer to variables bound earlier in the pattern.
75 E.g. you can match pairs where the cdr is larger than the car with a pattern
76 like `(,a . ,(pred (< a))) or, with more checks:
77 `(,(and a (pred numberp)) . ,(and (pred numberp) (pred (< a))))"
78 (declare (indent 1) (debug case)) ;FIXME: edebug `guard' and vars.
79 (or (gethash (cons exp cases) pcase-memoize)
80 (puthash (cons exp cases)
81 (pcase-expand exp cases)
82 pcase-memoize)))
84 ;;;###autoload
85 (defmacro pcase-let* (bindings body)
86 "Like `let*' but where you can use `pcase' patterns for bindings.
87 BODY should be an expression, and BINDINGS should be a list of bindings
88 of the form (UPAT EXP)."
89 (declare (indent 1) (debug let))
90 (if (null bindings) body
91 `(pcase ,(cadr (car bindings))
92 (,(caar bindings) (pcase-let* ,(cdr bindings) ,body))
93 ;; FIXME: In many cases `dontcare' would be preferable, so maybe we
94 ;; should have `let' and `elet', like we have `case' and `ecase'.
95 (t (error "Pattern match failure in `pcase-let'")))))
97 ;;;###autoload
98 (defmacro pcase-let (bindings body)
99 "Like `let' but where you can use `pcase' patterns for bindings.
100 BODY should be an expression, and BINDINGS should be a list of bindings
101 of the form (UPAT EXP)."
102 (declare (indent 1) (debug let))
103 (if (null (cdr bindings))
104 `(pcase-let* ,bindings ,body)
105 (setq bindings (mapcar (lambda (x) (cons (make-symbol "x") x)) bindings))
106 `(let ,(mapcar (lambda (binding) (list (nth 0 binding) (nth 2 binding)))
107 bindings)
108 (pcase-let*
109 ,(mapcar (lambda (binding) (list (nth 1 binding) (nth 0 binding)))
110 bindings)
111 ,body))))
113 (defun pcase-expand (exp cases)
114 (let* ((defs (if (symbolp exp) '()
115 (let ((sym (make-symbol "x")))
116 (prog1 `((,sym ,exp)) (setq exp sym)))))
117 (seen '())
118 (codegen
119 (lambda (code vars)
120 (let ((prev (assq code seen)))
121 (if (not prev)
122 (let ((res (pcase-codegen code vars)))
123 (push (list code vars res) seen)
124 res)
125 ;; Since we use a tree-based pattern matching
126 ;; technique, the leaves (the places that contain the
127 ;; code to run once a pattern is matched) can get
128 ;; copied a very large number of times, so to avoid
129 ;; code explosion, we need to keep track of how many
130 ;; times we've used each leaf and move it
131 ;; to a separate function if that number is too high.
133 ;; We've already used this branch. So it is shared.
134 (destructuring-bind (code prevvars res) prev
135 (unless (symbolp res)
136 ;; This is the first repeat, so we have to move
137 ;; the branch to a separate function.
138 (let ((bsym
139 (make-symbol (format "pcase-%d" (length defs)))))
140 (push `(,bsym (lambda ,(mapcar #'car prevvars) ,@code)) defs)
141 (setcar res 'funcall)
142 (setcdr res (cons bsym (mapcar #'cdr prevvars)))
143 (setcar (cddr prev) bsym)
144 (setq res bsym)))
145 (setq vars (copy-sequence vars))
146 (let ((args (mapcar (lambda (pa)
147 (let ((v (assq (car pa) vars)))
148 (setq vars (delq v vars))
149 (cdr v)))
150 prevvars)))
151 (when vars ;New additional vars.
152 (error "The vars %s are only bound in some paths"
153 (mapcar #'car vars)))
154 `(funcall ,res ,@args)))))))
155 (main
156 (pcase-u
157 (mapcar (lambda (case)
158 `((match ,exp . ,(car case))
159 ,(apply-partially
160 (if (pcase-small-branch-p (cdr case))
161 ;; Don't bother sharing multiple
162 ;; occurrences of this leaf since it's small.
163 #'pcase-codegen codegen)
164 (cdr case))))
165 cases))))
166 `(let ,defs ,main)))
168 (defun pcase-codegen (code vars)
169 `(let ,(mapcar (lambda (b) (list (car b) (cdr b))) vars)
170 ,@code))
172 (defun pcase-small-branch-p (code)
173 (and (= 1 (length code))
174 (or (not (consp (car code)))
175 (let ((small t))
176 (dolist (e (car code))
177 (if (consp e) (setq small nil)))
178 small))))
180 ;; Try to use `cond' rather than a sequence of `if's, so as to reduce
181 ;; the depth of the generated tree.
182 (defun pcase-if (test then else)
183 (cond
184 ((eq else :pcase-dontcare) then)
185 ((eq (car-safe else) 'if)
186 (if (equal test (nth 1 else))
187 ;; Doing a test a second time: get rid of the redundancy.
188 ;; FIXME: ideally, this should never happen because the pcase-split-*
189 ;; functions should have eliminated such things, but pcase-split-member
190 ;; is imprecise, so in practice it does happen occasionally.
191 `(if ,test ,then ,@(nthcdr 3 else))
192 `(cond (,test ,then)
193 (,(nth 1 else) ,(nth 2 else))
194 (t ,@(nthcdr 3 else)))))
195 ((eq (car-safe else) 'cond)
196 `(cond (,test ,then)
197 ;; Doing a test a second time: get rid of the redundancy, as above.
198 ,@(remove (assoc test else) (cdr else))))
199 (t `(if ,test ,then ,else))))
201 (defun pcase-upat (qpattern)
202 (cond
203 ((eq (car-safe qpattern) '\,) (cadr qpattern))
204 (t (list '\` qpattern))))
206 ;; Note about MATCH:
207 ;; When we have patterns like `(PAT1 . PAT2), after performing the `consp'
208 ;; check, we want to turn all the similar patterns into ones of the form
209 ;; (and (match car PAT1) (match cdr PAT2)), so you naturally need conjunction.
210 ;; Earlier code hence used branches of the form (MATCHES . CODE) where
211 ;; MATCHES was a list (implicitly a conjunction) of (SYM . PAT).
212 ;; But if we have a pattern of the form (or `(PAT1 . PAT2) PAT3), there is
213 ;; no easy way to eliminate the `consp' check in such a representation.
214 ;; So we replaced the MATCHES by the MATCH below which can be made up
215 ;; of conjunctions and disjunctions, so if we know `foo' is a cons, we can
216 ;; turn (match foo . (or `(PAT1 . PAT2) PAT3)) into
217 ;; (or (and (match car . `PAT1) (match cdr . `PAT2)) (match foo . PAT3)).
218 ;; The downside is that we now have `or' and `and' both in MATCH and
219 ;; in PAT, so there are different equivalent representations and we
220 ;; need to handle them all. We do not try to systematically
221 ;; canonicalize them to one form over another, but we do occasionally
222 ;; turn one into the other.
224 (defun pcase-u (branches)
225 "Expand matcher for rules BRANCHES.
226 Each BRANCH has the form (MATCH CODE . VARS) where
227 CODE is the code generator for that branch.
228 VARS is the set of vars already bound by earlier matches.
229 MATCH is the pattern that needs to be matched, of the form:
230 (match VAR . UPAT)
231 (and MATCH ...)
232 (or MATCH ...)"
233 (when (setq branches (delq nil branches))
234 (destructuring-bind (match code &rest vars) (car branches)
235 (pcase-u1 (list match) code vars (cdr branches)))))
237 (defun pcase-and (match matches)
238 (if matches `(and ,match ,@matches) match))
240 (defun pcase-split-match (sym splitter match)
241 (case (car match)
242 ((match)
243 (if (not (eq sym (cadr match)))
244 (cons match match)
245 (let ((pat (cddr match)))
246 (cond
247 ;; Hoist `or' and `and' patterns to `or' and `and' matches.
248 ((memq (car-safe pat) '(or and))
249 (pcase-split-match sym splitter
250 (cons (car pat)
251 (mapcar (lambda (alt)
252 `(match ,sym . ,alt))
253 (cdr pat)))))
254 (t (let ((res (funcall splitter (cddr match))))
255 (cons (or (car res) match) (or (cdr res) match))))))))
256 ((or and)
257 (let ((then-alts '())
258 (else-alts '())
259 (neutral-elem (if (eq 'or (car match)) :pcase-fail :pcase-succeed))
260 (zero-elem (if (eq 'or (car match)) :pcase-succeed :pcase-fail)))
261 (dolist (alt (cdr match))
262 (let ((split (pcase-split-match sym splitter alt)))
263 (unless (eq (car split) neutral-elem)
264 (push (car split) then-alts))
265 (unless (eq (cdr split) neutral-elem)
266 (push (cdr split) else-alts))))
267 (cons (cond ((memq zero-elem then-alts) zero-elem)
268 ((null then-alts) neutral-elem)
269 ((null (cdr then-alts)) (car then-alts))
270 (t (cons (car match) (nreverse then-alts))))
271 (cond ((memq zero-elem else-alts) zero-elem)
272 ((null else-alts) neutral-elem)
273 ((null (cdr else-alts)) (car else-alts))
274 (t (cons (car match) (nreverse else-alts)))))))
275 (t (error "Uknown MATCH %s" match))))
277 (defun pcase-split-rest (sym splitter rest)
278 (let ((then-rest '())
279 (else-rest '()))
280 (dolist (branch rest)
281 (let* ((match (car branch))
282 (code&vars (cdr branch))
283 (splitted
284 (pcase-split-match sym splitter match)))
285 (unless (eq (car splitted) :pcase-fail)
286 (push (cons (car splitted) code&vars) then-rest))
287 (unless (eq (cdr splitted) :pcase-fail)
288 (push (cons (cdr splitted) code&vars) else-rest))))
289 (cons (nreverse then-rest) (nreverse else-rest))))
291 (defun pcase-split-consp (syma symd pat)
292 (cond
293 ;; A QPattern for a cons, can only go the `then' side.
294 ((and (eq (car-safe pat) '\`) (consp (cadr pat)))
295 (let ((qpat (cadr pat)))
296 (cons `(and (match ,syma . ,(pcase-upat (car qpat)))
297 (match ,symd . ,(pcase-upat (cdr qpat))))
298 :pcase-fail)))
299 ;; A QPattern but not for a cons, can only go the `else' side.
300 ((eq (car-safe pat) '\`) (cons :pcase-fail nil))))
302 (defun pcase-split-equal (elem pat)
303 (cond
304 ;; The same match will give the same result.
305 ((and (eq (car-safe pat) '\`) (equal (cadr pat) elem))
306 (cons :pcase-succeed :pcase-fail))
307 ;; A different match will fail if this one succeeds.
308 ((and (eq (car-safe pat) '\`)
309 ;; (or (integerp (cadr pat)) (symbolp (cadr pat))
310 ;; (consp (cadr pat)))
312 (cons :pcase-fail nil))))
314 (defun pcase-split-member (elems pat)
315 ;; Based on pcase-split-equal.
316 (cond
317 ;; The same match (or a match of membership in a superset) will
318 ;; give the same result, but we don't know how to check it.
319 ;; (???
320 ;; (cons :pcase-succeed nil))
321 ;; A match for one of the elements may succeed or fail.
322 ((and (eq (car-safe pat) '\`) (member (cadr pat) elems))
323 nil)
324 ;; A different match will fail if this one succeeds.
325 ((and (eq (car-safe pat) '\`)
326 ;; (or (integerp (cadr pat)) (symbolp (cadr pat))
327 ;; (consp (cadr pat)))
329 (cons :pcase-fail nil))))
331 (defun pcase-split-pred (upat pat)
332 ;; FIXME: For predicates like (pred (> a)), two such predicates may
333 ;; actually refer to different variables `a'.
334 (if (equal upat pat)
335 (cons :pcase-succeed :pcase-fail)))
337 (defun pcase-fgrep (vars sexp)
338 "Check which of the symbols VARS appear in SEXP."
339 (let ((res '()))
340 (while (consp sexp)
341 (dolist (var (pcase-fgrep vars (pop sexp)))
342 (unless (memq var res) (push var res))))
343 (and (memq sexp vars) (not (memq sexp res)) (push sexp res))
344 res))
346 ;; It's very tempting to use `pcase' below, tho obviously, it'd create
347 ;; bootstrapping problems.
348 (defun pcase-u1 (matches code vars rest)
349 "Return code that runs CODE (with VARS) if MATCHES match.
350 and otherwise defers to REST which is a list of branches of the form
351 \(ELSE-MATCH ELSE-CODE . ELSE-VARS)."
352 ;; Depending on the order in which we choose to check each of the MATCHES,
353 ;; the resulting tree may be smaller or bigger. So in general, we'd want
354 ;; to be careful to chose the "optimal" order. But predicate
355 ;; patterns make this harder because they create dependencies
356 ;; between matches. So we don't bother trying to reorder anything.
357 (cond
358 ((null matches) (funcall code vars))
359 ((eq :pcase-fail (car matches)) (pcase-u rest))
360 ((eq :pcase-succeed (car matches))
361 (pcase-u1 (cdr matches) code vars rest))
362 ((eq 'and (caar matches))
363 (pcase-u1 (append (cdar matches) (cdr matches)) code vars rest))
364 ((eq 'or (caar matches))
365 (let* ((alts (cdar matches))
366 (var (if (eq (caar alts) 'match) (cadr (car alts))))
367 (simples '()) (others '()))
368 (when var
369 (dolist (alt alts)
370 (if (and (eq (car alt) 'match) (eq var (cadr alt))
371 (let ((upat (cddr alt)))
372 (and (eq (car-safe upat) '\`)
373 (or (integerp (cadr upat)) (symbolp (cadr upat))
374 (stringp (cadr upat))))))
375 (push (cddr alt) simples)
376 (push alt others))))
377 (cond
378 ((null alts) (error "Please avoid it") (pcase-u rest))
379 ((> (length simples) 1)
380 ;; De-hoist the `or' MATCH into an `or' pattern that will be
381 ;; turned into a `memq' below.
382 (pcase-u1 (cons `(match ,var or . ,(nreverse simples)) (cdr matches))
383 code vars
384 (if (null others) rest
385 (cons (list*
386 (pcase-and (if (cdr others)
387 (cons 'or (nreverse others))
388 (car others))
389 (cdr matches))
390 code vars)
391 rest))))
393 (pcase-u1 (cons (pop alts) (cdr matches)) code vars
394 (if (null alts) (progn (error "Please avoid it") rest)
395 (cons (list*
396 (pcase-and (if (cdr alts)
397 (cons 'or alts) (car alts))
398 (cdr matches))
399 code vars)
400 rest)))))))
401 ((eq 'match (caar matches))
402 (destructuring-bind (op sym &rest upat) (pop matches)
403 (cond
404 ((memq upat '(t _)) (pcase-u1 matches code vars rest))
405 ((eq upat 'dontcare) :pcase-dontcare)
406 ((functionp upat) (error "Feature removed, use (pred %s)" upat))
407 ((memq (car-safe upat) '(guard pred))
408 (destructuring-bind (then-rest &rest else-rest)
409 (pcase-split-rest
410 sym (apply-partially 'pcase-split-pred upat) rest)
411 (pcase-if (if (and (eq (car upat) 'pred) (symbolp (cadr upat)))
412 `(,(cadr upat) ,sym)
413 (let* ((exp (cadr upat))
414 ;; `vs' is an upper bound on the vars we need.
415 (vs (pcase-fgrep (mapcar #'car vars) exp))
416 (call (cond
417 ((eq 'guard (car upat)) exp)
418 ((functionp exp) `(,exp ,sym))
419 (t `(,@exp ,sym)))))
420 (if (null vs)
421 call
422 ;; Let's not replace `vars' in `exp' since it's
423 ;; too difficult to do it right, instead just
424 ;; let-bind `vars' around `exp'.
425 `(let ,(mapcar (lambda (var)
426 (list var (cdr (assq var vars))))
428 ;; FIXME: `vars' can capture `sym'. E.g.
429 ;; (pcase x ((and `(,x . ,y) (pred (fun x)))))
430 ,call))))
431 (pcase-u1 matches code vars then-rest)
432 (pcase-u else-rest))))
433 ((symbolp upat)
434 (pcase-u1 matches code (cons (cons upat sym) vars) rest))
435 ((eq (car-safe upat) '\`)
436 (pcase-q1 sym (cadr upat) matches code vars rest))
437 ((eq (car-safe upat) 'or)
438 (let ((all (> (length (cdr upat)) 1))
439 (memq-fine t))
440 (when all
441 (dolist (alt (cdr upat))
442 (unless (and (eq (car-safe alt) '\`)
443 (or (symbolp (cadr alt)) (integerp (cadr alt))
444 (setq memq-fine nil)
445 (stringp (cadr alt))))
446 (setq all nil))))
447 (if all
448 ;; Use memq for (or `a `b `c `d) rather than a big tree.
449 (let ((elems (mapcar 'cadr (cdr upat))))
450 (destructuring-bind (then-rest &rest else-rest)
451 (pcase-split-rest
452 sym (apply-partially 'pcase-split-member elems) rest)
453 (pcase-if `(,(if memq-fine #'memq #'member) ,sym ',elems)
454 (pcase-u1 matches code vars then-rest)
455 (pcase-u else-rest))))
456 (pcase-u1 (cons `(match ,sym ,@(cadr upat)) matches) code vars
457 (append (mapcar (lambda (upat)
458 `((and (match ,sym . ,upat) ,@matches)
459 ,code ,@vars))
460 (cddr upat))
461 rest)))))
462 ((eq (car-safe upat) 'and)
463 (pcase-u1 (append (mapcar (lambda (upat) `(match ,sym ,@upat)) (cdr upat))
464 matches)
465 code vars rest))
466 ((eq (car-safe upat) 'not)
467 ;; FIXME: The implementation below is naive and results in
468 ;; inefficient code.
469 ;; To make it work right, we would need to turn pcase-u1's
470 ;; `code' and `vars' into a single argument of the same form as
471 ;; `rest'. We would also need to split this new `then-rest' argument
472 ;; for every test (currently we don't bother to do it since
473 ;; it's only useful for odd patterns like (and `(PAT1 . PAT2)
474 ;; `(PAT3 . PAT4)) which the programmer can easily rewrite
475 ;; to the more efficient `(,(and PAT1 PAT3) . ,(and PAT2 PAT4))).
476 (pcase-u1 `((match ,sym . ,(cadr upat)))
477 (lexical-let ((rest rest))
478 ;; FIXME: This codegen is not careful to share its
479 ;; code if used several times: code blow up is likely.
480 (lambda (vars)
481 ;; `vars' will likely contain bindings which are
482 ;; not always available in other paths to
483 ;; `rest', so there' no point trying to pass
484 ;; them down.
485 (pcase-u rest)))
486 vars
487 (list `((and . ,matches) ,code . ,vars))))
488 (t (error "Unknown upattern `%s'" upat)))))
489 (t (error "Incorrect MATCH %s" (car matches)))))
491 (defun pcase-q1 (sym qpat matches code vars rest)
492 "Return code that runs CODE if SYM matches QPAT and if MATCHES match.
493 and if not, defers to REST which is a list of branches of the form
494 \(OTHER_MATCH OTHER-CODE . OTHER-VARS)."
495 (cond
496 ((eq (car-safe qpat) '\,) (error "Can't use `,UPATTERN"))
497 ((floatp qpat) (error "Floating point patterns not supported"))
498 ((vectorp qpat)
499 ;; FIXME.
500 (error "Vector QPatterns not implemented yet"))
501 ((consp qpat)
502 (let ((syma (make-symbol "xcar"))
503 (symd (make-symbol "xcdr")))
504 (destructuring-bind (then-rest &rest else-rest)
505 (pcase-split-rest sym (apply-partially 'pcase-split-consp syma symd)
506 rest)
507 (pcase-if `(consp ,sym)
508 `(let ((,syma (car ,sym))
509 (,symd (cdr ,sym)))
510 ,(pcase-u1 `((match ,syma . ,(pcase-upat (car qpat)))
511 (match ,symd . ,(pcase-upat (cdr qpat)))
512 ,@matches)
513 code vars then-rest))
514 (pcase-u else-rest)))))
515 ((or (integerp qpat) (symbolp qpat) (stringp qpat))
516 (destructuring-bind (then-rest &rest else-rest)
517 (pcase-split-rest sym (apply-partially 'pcase-split-equal qpat) rest)
518 (pcase-if `(,(if (stringp qpat) #'equal #'eq) ,sym ',qpat)
519 (pcase-u1 matches code vars then-rest)
520 (pcase-u else-rest))))
521 (t (error "Unkown QPattern %s" qpat))))
524 (provide 'pcase)
525 ;;; pcase.el ends here