fix
[camlunity.git] / simple_markup.ml
blobb461e8c09bd5d5b2fbe56a878af483b584a502e0
1 (* Copyright (C) 2009 Mauricio Fernandez <mfp@acm.org> *)
2 open Printf
3 open ExtString
4 open ExtList
6 (* TYPE_CONV_PATH "Simple_markup" *)
8 type ref = { src : string; desc : string }
10 type paragraph =
11 Normal of par_text
12 | Pre of string * string option
13 | Heading of int * par_text
14 | Quote of paragraph list
15 | Ulist of paragraph list * paragraph list list
16 | Olist of paragraph list * paragraph list list
18 and par_text = text list
20 and text =
21 Text of string
22 | Emph of string
23 | Bold of string
24 | Struck of par_text
25 | Code of string
26 | Link of href
27 | Anchor of string
28 | Image of img_ref
30 and href = { href_target : string; href_desc : string; }
32 and img_ref = { img_src : string; img_alt : string; }
34 (* and par_list = paragraph list with sexp *)
36 (* class fold = Camlp4Filters.GenerateFold.generated *)
38 type parse_state = { max : int; current : Buffer.t; fragments : text list; }
40 (* let string_of_paragraph p = Sexplib.Sexp.to_string_hum (sexp_of_paragraph p) *)
41 (* let string_of_paragraphs ps = Sexplib.Sexp.to_string_hum (sexp_of_par_list ps) *)
43 let indentation ?(ts=8) s =
44 let rec loop n indent max =
45 if n >= max then indent
46 else match s.[n] with
47 ' ' -> loop (n + 1) (indent + 1) max
48 | '\t' -> loop (n + 1) (indent + 8) max
49 | _ -> indent
50 in loop 0 0 (String.length s)
52 let unescape s =
53 let b = Buffer.create (String.length s) in
54 let len = String.length s in
55 let rec loop i =
56 if i >= len then Buffer.contents b
57 else match s.[i] with
58 '\\' when i < len - 1 -> Buffer.add_char b s.[i+1]; loop (i + 2)
59 | c -> Buffer.add_char b c; loop (i + 1)
60 in loop 0
62 let unescape_slice s ~first ~last =
63 unescape (String.strip (String.slice ~first ~last s))
65 let snd_is s c = String.length s > 1 && s.[1] = c
66 let snd_is_space s = snd_is s ' ' || snd_is s '\t'
68 let collect f x =
69 let rec loop acc = match f x with
70 None -> List.rev acc
71 | Some y -> loop (y :: acc)
72 in loop []
74 let push_remainder ?(first=2) indent s e =
75 let s = String.slice ~first s in
76 let s' = String.strip s in
77 Enum.push e (indent + first + indentation s, s', s' = "")
79 let adds = Buffer.add_string
81 let addc = Buffer.add_char
83 let new_fragment () = Buffer.create 8
85 let push_current st =
86 if Buffer.length st.current > 0 then
87 Text (Buffer.contents st.current) :: st.fragments
88 else st.fragments
90 let rec read_paragraph ?(skip_blank=true) indent e = match Enum.peek e with
91 None -> None
92 | Some (indentation, line, isblank) -> match isblank with
93 true ->
94 Enum.junk e;
95 if skip_blank then read_paragraph indent e else None
96 | false ->
97 if indentation < indent then
98 None
99 else begin
100 Enum.junk e;
101 read_nonempty indentation e line
104 and skip_blank_line e = match Enum.peek e with
105 None | Some (_, _, false) -> ()
106 | Some (_, _, true) -> Enum.junk e; skip_blank_line e
108 and read_nonempty indent e s = match s.[0] with
109 '!' -> read_heading s
110 | '*' when snd_is_space s -> push_remainder indent s e; read_ul indent e
111 | '#' when snd_is_space s -> push_remainder indent s e; read_ol indent e
112 | '{' when snd_is s '{' -> read_pre (String.slice s ~first:2) e
113 | '>' when snd_is_space s || s = ">" ->
114 (* last check needed because "> " becomes ">" *)
115 Enum.push e (indent, s, false); read_quote indent e
116 | _ -> Enum.push e (indent, s, false); read_normal e
118 and read_heading s =
119 let s' = String.strip ~chars:"!" s in
120 let level = String.length s - String.length s' in
121 Some (Heading (level, parse_text s'))
123 and read_ul indent e =
124 read_list
125 (fun fst others -> Ulist (fst, others))
126 (fun s -> snd_is_space s && s.[0] = '*')
127 indent e
129 and read_ol indent e =
130 read_list
131 (fun fst others -> Olist (fst, others))
132 (fun s -> snd_is_space s && s.[0] = '#')
133 indent e
135 and read_list f is_item indent e =
136 let read_item indent ps = collect (read_paragraph (indent + 1)) e in
137 let rec read_all fst others =
138 skip_blank_line e;
139 match Enum.peek e with
140 | Some (indentation, s, _) when indentation >= indent && is_item s ->
141 Enum.junk e;
142 push_remainder indentation s e;
143 read_all fst (read_item indentation [] :: others)
144 | None | Some _ -> f fst (List.rev others)
145 in Some (read_all (read_item indent []) [])
147 and read_pre kind e =
148 let kind = match kind with "" -> None | s -> Some s in
149 let re = Str.regexp "^\\\\+}}$" in
150 let unescape = function
151 s when Str.string_match re s 0 -> String.slice ~first:1 s
152 | s -> s in
153 (* don't forget the last \n *)
154 let ret ls = Some (Pre (String.concat "\n" (List.rev ("" :: ls)), kind)) in
155 let rec read_until_end fstindent ls = match Enum.get e with
156 None | Some (_, "}}", _) -> ret ls
157 | Some (indentation, s, _) ->
158 let spaces = String.make (max 0 (indentation - fstindent)) ' ' in
159 read_until_end fstindent ((spaces ^ unescape s) :: ls)
160 in match Enum.get e with
161 None | Some (_, "}}", _) -> ret []
162 | Some (indentation, s, _) -> read_until_end indentation [s]
164 and read_quote indent e =
165 let push_and_finish e elm = Enum.push e elm; raise Enum.No_more_elements in
166 let next_without_lt e = function
167 | (_, _, true) as line -> push_and_finish e line
168 | (n, s, false) as line ->
169 if n < indent || s.[0] <> '>' then
170 push_and_finish e line
171 else
172 let s = String.slice ~first:1 s in
173 let s' = String.strip s in
174 (String.length s - String.length s', s', s' = "")
176 in match collect (read_paragraph 0) (Enum.map (next_without_lt e) e) with
177 [] -> None
178 | ps -> Some (Quote ps)
180 and read_normal e =
181 let rec gettxt ls =
182 let return () = String.concat " " (List.rev ls) in
183 match Enum.peek e with
184 None | Some (_, _, true) -> return ()
185 | Some (_, l, _) -> match l.[0] with
186 '!' | '*' | '#' | '>' when snd_is_space l -> return ()
187 | '{' when snd_is l '{' -> return ()
188 | _ -> Enum.junk e; gettxt (l :: ls) in
189 let txt = gettxt [] in
190 Some (Normal (parse_text txt))
192 and parse_text s =
193 scan
195 { max = String.length s;
196 fragments = [];
197 current = new_fragment (); }
200 (* scan s starting from n, upto max (exclusive) *)
201 and scan s st n =
202 let max = st.max in
203 if n >= max then List.rev (push_current st)
205 else match s.[n] with
206 | '`' ->
207 delimited (fun ~first ~last -> Code (unescape_slice s ~first ~last)) "`"
208 s st n
209 | '*' ->
210 delimited (fun ~first ~last -> Bold (unescape_slice s ~first ~last)) "*"
211 s st n
212 | '_' ->
213 delimited (fun ~first ~last -> Emph (unescape_slice s ~first ~last)) "__"
214 s st n
215 | '=' ->
216 delimited
217 (fun ~first ~last ->
218 Struck (scan s
219 { max = last; fragments = []; current = new_fragment (); }
220 first))
221 "==" s st n
222 | '!' when matches_at s ~max n "![" ->
223 maybe_link
224 "![" (fun ref -> Image { img_src = ref.src; img_alt = ref.desc })
225 s st (n + 2)
226 | '[' ->
227 maybe_link "["
228 (fun ref -> match ref.src, ref.desc with
229 "", "" -> Text ""
230 | "", desc -> Link { href_target = desc; href_desc = desc }
231 | src, "" when src.[0] = '#' -> Anchor (String.slice ~first:1 src)
232 | src, desc -> Link { href_target = ref.src; href_desc = ref.desc})
233 s st (n + 1)
234 | '\\' when (n + 1) < max -> addc st.current s.[n+1]; scan s st (n + 2)
235 | c -> addc st.current c; scan s st (n + 1)
237 (* [delimited f delim first] tries to match [delim] starting from [first],
238 * returns Some (offset of char after closing delim) or None *)
239 and delimited f delim s st first =
240 let max = st.max in
241 let delim_len = String.length delim in
242 let scan_from_next_char () =
243 addc st.current s.[first];
244 scan s st (first + 1)
246 if not (matches_at s ~max first delim) then scan_from_next_char ()
247 else match scan_past ~delim s ~max (first + String.length delim) with
248 Some n ->
249 let chunk = f ~first:(first + delim_len)
250 ~last:(n - String.length delim)
251 in scan s
252 { st with fragments = chunk :: push_current st;
253 current = new_fragment () }
255 | None -> scan_from_next_char ()
257 and maybe_link delim f s st n = match scan_link s ~max:st.max n with
258 None -> adds st.current delim; scan s st n
259 | Some (ref, n) ->
260 scan s
261 { st with fragments = f ref :: push_current st;
262 current = (new_fragment ()) }
265 (* return None if delim not found, else Some (offset of char *after* delim) *)
266 and scan_past ~delim s ~max n =
267 let re = Str.regexp (Str.quote delim) in
268 let rec loop m ~max =
269 if m >= max then None else
270 match (try Some (Str.search_forward re s m) with Not_found -> None) with
271 | Some m when m < max && s.[m-1] <> '\\' -> Some (m + String.length delim)
272 | Some m when m < max -> loop (m + 1) ~max
273 | _ -> None (* no match or >= max *)
274 in loop n ~max
276 (* returns None or offset of char after the reference
277 * (i.e. after closing ')'). *)
278 and scan_link s ~max n = match scan_past ~delim:"]" s ~max n with
279 None -> None
280 | Some end_of_desc ->
281 if end_of_desc >= max then None
282 else match s.[end_of_desc] with
283 '(' ->
284 begin match scan_past ~delim:")" s ~max (end_of_desc + 1) with
285 None -> None
286 | Some end_of_uri ->
287 let ref =
289 desc = unescape_slice s ~first:n ~last:(end_of_desc - 1);
290 src = unescape_slice s
291 ~first:(end_of_desc + 1)
292 ~last:(end_of_uri - 1)
294 in Some (ref, end_of_uri)
296 | _ -> None
298 and matches_at s ~max n delim =
299 let len = String.length delim in
300 if n + len > max then false
301 else
302 let rec loop n m k =
303 if k = 0 then true
304 else if s.[n] = delim.[m] then loop (n + 1) (m + 1) (k - 1)
305 else false
306 in loop n 0 len
308 let parse_enum e =
309 collect (read_paragraph 0)
310 (Enum.map (fun l -> let l' = String.strip l in (indentation l, l', l' = "")) e)
312 let parse_lines ls = parse_enum (List.enum ls)
313 let parse_text s = parse_lines ((Str.split (Str.regexp "\n") s))