muse-wiki: Try to fix an issue with large projects.
[muse-el.git] / experimental / muse-split.el
blob592b904ae52aa821ec1e887b8618ebec90b0031a
1 ;;; muse-split.el --- split published Muse files
3 ;; Copyright (C) 2006 Free Software Foundation, Inc.
5 ;; Author: Phillip Lord <phillip.lord@newcastle.ac.uk>
7 ;; This file is part of Emacs Muse. It is not part of GNU Emacs.
9 ;; Emacs Muse is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 2, or (at your
12 ;; option) any later version.
14 ;; Emacs Muse is distributed in the hope that it will be useful, but
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;; General Public License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with Emacs Muse; see the file COPYING. If not, write to the
21 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 ;; Boston, MA 02110-1301, USA.
24 ;;; Commentary:
26 ;;; Status:
28 ;; This works now, except that anchors will get broken, as they may
29 ;; well point to the wrong thing.
31 ;; Anchors are mostly working, some crashes in caching code. Have
32 ;; realised that could just circumvent the anchors problem by always
33 ;; pointing toward the full length entry which all of my split
34 ;; functions generate now. Given the complexity that this has
35 ;; introduced taht might not have been a bad idea.
37 ;; These functions directly over-write the original versions in
38 ;; muse-publish.
40 (require 'muse-publish)
41 (require 'assoc)
42 (eval-when-compile
43 (require 'cl))
45 ;; this code duplicates that in muse-publish-markup-regexps and should
46 ;; be factored out. I use this style to pull directives from the front
47 ;; of file.
48 (defvar muse-publish-presplit-markup-regexps
50 ;; Handle any leading #directives
51 (1200 "\\`#\\([a-zA-Z-]+\\)\\s-+\\(.+\\)\n+" 0 directive)
52 ;; define anchor points
53 (1500 "^\\(\\W*\\)#\\(\\S-+\\)\\s-*" 0 anchor)))
55 (defvar muse-publish-presplit-functions
56 '((directive . muse-publish-presplit-directive)
57 (anchor . muse-publish-presplit-anchor)))
59 ;; oh dear, this function used to be so simple and now has got so
60 ;; nasty. I'm sure I can amalgamate some of the let bindings and
61 ;; lambda function.
62 (defun muse-publish-file (file style &optional output-dir force)
63 "Publish the given file in list FILES.
64 If the argument FORCE is nil, each file is only published if it is
65 newer than the published version. If the argument FORCE is non-nil,
66 the file is published no matter what."
67 (interactive (cons (read-file-name "Publish file: ")
68 (muse-publish-get-info)))
69 (setq style (muse-style style))
70 (let* ((output-path (muse-publish-output-file file output-dir style))
71 (output-suffix (muse-style-element :osuffix style))
72 (muse-publishing-current-file file)
73 (muse-publishing-style-in-use style)
74 (muse-publish-split-file-split-values nil)
75 (muse-publish-presplit-directive-store "")
76 (muse-publish-presplit-anchor-location nil)
77 (muse-publishing-targets-alist
78 (muse-publish-split-file file))
79 (target-list
80 (mapcar
81 (lambda(elem)
82 (if output-suffix
83 (concat (file-name-sans-extension
84 (cdr (car elem)))
85 output-suffix)
86 output-path))
87 muse-publishing-targets-alist)))
89 (when (or force
90 ;; update if any of the files are out of date.
91 (let ((outofdate nil))
92 (mapc
93 (lambda(elem)
94 (if (file-newer-than-file-p file
95 (car elem))
96 (setq outofdate t)))
97 muse-publishing-targets-alist)
98 outofdate))
100 (if (and muse-publish-report-threshhold
101 (> (nth 7 (file-attributes file))
102 muse-publish-report-threshhold))
103 (message "Publishing %s ..." file)
104 ;; need to grab the directives.
105 (muse-publish-presplit-publish file)
106 ;; start a temp buffer for main data
107 (muse-with-temp-buffer
108 (insert-file-contents file)
109 (let ((mainbuffer (current-buffer))
110 (subcontents))
111 (mapc
112 (lambda(elem)
113 (muse-with-temp-buffer
114 ;; not handling the directives yet.
115 (save-excursion
116 (set-buffer mainbuffer)
117 (setq subcontents
118 (buffer-substring-no-properties
119 (cadr elem) (caddr elem))))
120 ;; insert the directives afresh.
121 (insert muse-publish-presplit-directive-store)
122 (insert subcontents)
123 (muse-publish-markup-buffer (muse-page-name file) style)
124 (let* ((backup-inhibited t))
125 (write-file (muse-publish-output-file (car elem)
126 output-dir style)))
127 (muse-style-run-hooks :final style file (car elem))))
128 muse-publishing-targets-alist)))
129 t))))
131 (defun muse-publish-presplit-publish(file)
132 (muse-with-temp-buffer
133 (insert-file-contents file)
134 (let ((muse-publish-markup-regexps muse-publish-presplit-markup-regexps)
135 (muse-publish-markup-functions muse-publish-presplit-functions)
136 (muse-publishing-styles)
137 (muse-publish-presplit-splitting-file file))
138 ;; great an empty style. The name is just wierd, so that
139 ;; it won't preexist (which makes muse crash). The let
140 ;; binding should mean that it disappears.
141 (muse-define-style "ThePurposeIsNotToDescribeTheWorldButToChangeIt")
142 (muse-publish-markup-buffer
143 (muse-page-name "temp")
144 "ThePurposeIsNotToDescribeTheWorldButToChangeIt"))))
146 (defun muse-publish-prepare-url (target &rest ignored)
147 "Transform anchors and get published name, if TARGET is a page."
148 (save-match-data
149 (unless (or (string-match muse-url-regexp target)
150 (string-match muse-image-regexp target)
151 (string-match muse-file-regexp target))
152 (setq target (if (string-match "#" target)
153 ;; is this a simple anchor, we need to check
154 ;; where it will be published.
155 (if (eq (aref target 0) ?\#)
156 (concat
157 (muse-publish-link-name
158 (muse-publish-split-file-for-anchor
159 muse-publishing-current-file
160 (substring target 1)))
161 target)
162 ;; it's not anchor simple anchor, so we need to
163 ;; put in the extension
164 (let
165 ((file (substring target 0 (match-beginning 0)))
166 (anchor (substring target (match-end 0))))
167 (concat (muse-publish-link-name
168 (muse-publish-split-file-for-anchor
169 (concat (file-name-directory
170 muse-publishing-current-file)
171 file "." muse-file-extension)
172 anchor))
173 "#" anchor)))
174 ;; it's not an anchor at all.
175 (muse-publish-link-name target))))
176 target))
178 ;; these are support functions
180 ;; we currently have to store a lot of state to get this to work,
181 ;; which is rather dissatisfying. All of it is let bound from
182 ;; muse-publish-file. Wey hey for dynamic scoping.
183 (defvar muse-publish-presplit-directive-store nil
184 "Stores directives from main file during splitting")
186 (defvar muse-publish-presplit-anchor-location nil
187 "Stores anchors during publishing.")
189 (defvar muse-publish-split-file-split-values nil
190 "Cache the values of split locations in files, during publish")
192 (defvar muse-publishing-targets-alist nil
193 "Stores the targets to be published to.
195 Changing this will cause bad things to happen. ")
197 (defvar muse-publishing-style-in-use nil
198 "Stores the style currently being published")
200 (defvar muse-publish-presplit-splitting-file nil
201 "The file that we are current publishing for presplit")
204 (defun muse-publish-no-split-function (file)
205 (muse-with-temp-buffer
206 (insert-file-contents file)
207 (list `(,(file-name-sans-extension file) . (1 ,(point-max))))))
209 (defun muse-publish-split-file (file)
210 "Calculate where to split the FILE.
212 FILE is the file to be split
214 This should return an alist of form (position . output-file)
215 where position is the last position that should appear in output-file"
216 (let* ((split-function
217 (muse-get-keyword
218 :split muse-publishing-style-in-use t))
219 (split-alist
220 (if (not split-function)
221 (muse-publish-no-split-function file)
222 (funcall split-function file))))
223 (aput 'muse-publish-split-file-split-values
224 file split-alist)
225 split-alist))
227 (defun muse-publish-presplit-directive (&optional name value)
228 (unless name (setq name (match-string 1)))
229 (unless value (setq value (match-string 2)))
230 ;; store the directives.
231 (setq muse-publish-presplit-directive-store
232 (format "%s#%s %s\n"
233 muse-publish-presplit-directive-store
234 name value)))
236 (defun muse-publish-presplit-anchor()
237 "Stores the location and names of anchors"
238 (let ((alist (aget muse-publish-presplit-anchor-location
239 muse-publish-presplit-splitting-file)))
241 (add-to-list 'alist
242 `(,(match-string 2) . ,(match-beginning 2)))
243 (aput 'muse-publish-presplit-anchor-location
244 muse-publish-presplit-splitting-file
245 alist)))
248 ;; ;;(setq muse-publish-split-file-split-values nil)
249 ;; (setq muse-publish-split-file-split-values
250 ;; '(("d:/home/src/ht/home_website/journal-split/journal.muse"
251 ;; ("d:/home/src/ht/home_website/journal-split/journal-20060226" 875 1592)
252 ;; ("d:/home/src/ht/home_website/journal-split/journal-20060228" 417 874)
253 ;; ("d:/home/src/ht/home_website/journal-split/journal-20060303" 27 416)
254 ;; ("d:/home/src/ht/home_website/journal-split/journal-20060220" 1593 2957)
255 ;; ("d:/home/src/ht/home_website/journal-split/journal-all" 1 2957)
256 ;; ("d:/home/src/ht/home_website/journal-split/journal" 1 2957))))
258 ;; ;; muse-publish-presplit-anchor-location's value is shown below.
259 ;; ;; Value:
260 ;; ;; (setq muse-publish-presplit-anchor-location nil)
261 ;; (setq muse-publish-presplit-anchor-location
262 ;; '(("d:/home/src/ht/home_website/journal-split/journal.muse"
263 ;; ("semantic_enrichment" 1642)
264 ;; ("title" 2))
265 ;; ("d:/home/src/ht/home_website/journal-split/simple.muse"
266 ;; ("anchor7" 189)
267 ;; ("anchor3" 173)
268 ;; ("anchor2" 162)
269 ;; ("simple_anchor" 15))))
271 ;; get the anchor locations
272 ;; (muse-publish-presplit-publish file)
273 ;; get the split locations
275 ;; (muse-publish-split-file file))
277 (defun test1()
278 (interactive)
279 (message "%s" (muse-publish-split-file-for-anchor
280 "d:/home/src/ht/home_website/journal-split/journal.muse"
281 "semantic_enrichment")))
283 (defun muse-publish-split-file-for-anchor (base-file anchor)
284 "Given a base file and an anchor, return the file into which
285 the anchor will be output"
286 (let* (
287 ;; this should be an alist, keyed on the anchor, valued on
288 ;; either numbers, or file-locations
289 (anchor-alist
291 (aget muse-publish-presplit-anchor-location
292 base-file)
293 (progn
294 (muse-publish-presplit-publish base-file)
295 (aget muse-publish-presplit-anchor-location
296 base-file))))
298 ;; this should be a list of triples: file, start, stop.
299 (split-list
300 (or (aget muse-publish-split-file-split-values
301 base-file)
302 (muse-publish-split-file base-file)))
303 ;; this should be either the position of the anchor in a
304 ;; buffer as an int, or a output file location
305 (anchor-position-or-location
306 (aget anchor-alist anchor))
307 ;; this should definately be the output file location
308 (anchor-output
309 (if (stringp anchor-position-or-location)
310 anchor-position-or-location
311 (car
312 (delete nil
313 (mapcar
314 (lambda(elem)
315 (if (and
316 (> anchor-position-or-location
317 (cadr elem))
318 (< anchor-position-or-location
319 (caddr elem)))
320 (car elem)))
321 split-list))))))
323 ;; ensure that we put the location back into the stored list so
324 ;; that we don't have to work it out next time
325 (aput
326 'anchor-alist anchor anchor-output)
328 (aput 'muse-publish-presplit-anchor-location
329 base-file anchor-alist)
331 (file-name-nondirectory anchor-output)))
334 ;; this is an example of why I would want to use the code.
335 (muse-derive-style "journal-html-by-day" "journal-html"
336 :split 'muse-journal-split-by-entry)
338 (muse-derive-style "journal-html-by-month" "journal-html"
339 :split 'muse-journal-split-by-month)
342 (defun muse-journal-split-by-entry (file)
343 "Split a muse journal file into days"
344 (muse-with-temp-buffer
345 (insert-file-contents file)
346 (let* ((split-alist)
347 (root-name (file-name-sans-extension file))
348 (split-regexp "^\\* \\([0-9]\\{8\\}\\)")
349 (current-position
350 (if (re-search-forward split-regexp nil t)
351 (- (match-beginning 0) 1)))
352 (entry-name (match-string 1))
353 (entry-location (match-beginning 0)))
354 (while (re-search-forward split-regexp nil t)
355 (setq entry-location (match-beginning 0))
356 (add-to-list 'split-alist
357 `(,(concat root-name "-" entry-name)
358 ,current-position
359 ,(- entry-location 1)))
360 (setq current-position entry-location
361 entry-name (match-string 1)))
363 (add-to-list 'split-alist
364 `(,(concat root-name "-" entry-name)
365 ,current-position
366 ,(point-max))
369 (add-to-list 'split-alist
370 `(,root-name
371 ,(cadr (car (last split-alist)))
372 ,(caddr (car (last split-alist))))
375 (add-to-list 'split-alist
376 `(,(concat root-name "-all")
377 1 ,(point-max))
378 t))))
380 (defun muse-journal-split-by-month (file)
381 "Split a muse journal file into months.
383 This function makes the assumption that the entries are sorted. If
384 it isn't then it some of the entries will appear not to be published."
385 (muse-with-temp-buffer
386 (insert-file-contents file)
387 (let* ((split-alist)
388 (root-name (file-name-sans-extension file))
389 (split-regexp (concat "^\\* \\([0-9]\\{4\\}\\)\\([0-9]\\{2\\}\\)"
390 "\\([0-9]\\{2\\}\\)"))
391 (current-position
392 (if (re-search-forward split-regexp nil t)
393 (- (match-beginning 0) 1)))
394 (entry-name (muse-journal-split-by-month-name))
395 (entry-location (match-beginning 0)))
397 ;; for a new entry, if the name has changed
398 (while (and (re-search-forward split-regexp nil t)
399 (not (equal entry-name
400 (muse-journal-split-by-month-name))))
401 (setq entry-location (match-beginning 0))
402 (add-to-list 'split-alist
403 `(,(concat root-name "-" entry-name)
404 ,current-position
405 ,(- entry-location 1)))
407 (setq current-position entry-location
408 entry-name (muse-journal-split-by-month-name)))
410 ;; add last entry
411 (add-to-list 'split-alist
412 `(,(concat root-name "-" entry-name)
413 ,current-position
414 ,(point-max)))
416 ;; add some duplicate entries in. Add these last, so that
417 ;; anchors go to one of the others.
420 ;; duplicate last entry as current
421 (add-to-list 'split-alist
422 `(,root-name
423 ,(cadr (car (last split-alist)))
424 ,(caddr (car (last split-alist))))
427 ;; add all entry
428 (add-to-list 'split-alist
429 `(,(concat root-name "-all")
430 1 ,(point-max))
431 t))))
433 (defun muse-journal-split-by-month-name()
434 (concat (match-string 1)
435 (match-string 2)))
438 (defun test2()
439 (interactive)
440 (message "%s" (muse-journal-split-by-entry "journal.muse")))
443 (provide 'muse-split)
444 ;; muse-split.el ends here