[project @ sridhar.ratna@gmail.com-20071015190851-bmgoaohgpcl1apvc]
[orgweb.git] / org2json.py
blob2251114a6d0810f39209b4b5aefc432b03c82c14
1 # org2json
2 # parse a org-mode file and convert it to JSON
4 __author__ = "Sridhar Ratnakumar <http://nearfar.org/>"
6 import re
7 from itertools import count
9 from simplejson import dumps
11 def rindexed(seq):
12 """
13 >>> l = [5,7,9]
14 >>> print list(rindexed(l))
15 [(2, 9), (1, 7), (0, 5)]
16 """
17 return zip(
18 range(len(seq))[::-1],
19 reversed(seq))
21 def reverse(iter):
22 l = list(iter)
23 l.reverse()
24 return l
27 def org2py(orgtext):
28 """Parse the given org file text and return the Python data structure
30 >>> j = org2py(open('sample.org').read())
31 >>> j=list(j)
32 >>> j[-1]['text']
33 'Projects'
34 >>> j[-2]['text']
35 'Whims'
36 >>> j[-3]['text']
37 'Online Stuff'
38 >>> j[-4]['text']
39 'Stuff'
40 >>> j[-5]['text']
41 'Travels'
42 """
43 lines = orgtext.splitlines()
44 def e(i1, i2):
45 return '\n'.join(lines[i1:i2])
47 def by_star():
48 last_index = len(lines)
49 for index, line in rindexed(lines):
50 if line.startswith("*"):
51 yield [index, e(index, last_index)]
52 last_index = index
54 def hier(items):
55 STARS_PAT = re.compile(r"^(\**) (.*)", re.DOTALL)
56 TAGS_PAT = re.compile(r"^(.*)\s((\w)*(:(\w)*)*:)$")
57 def splititem(s):
58 """
59 >>> splititem("*** Foo Bar :TAG1:TAG2:")
60 (3, 'Foo Bar', ('TAG1, 'TAG2'))
61 >>> splititem("** write org-mode tutorial")
62 (2, 'write org-mode tutorial')
63 """
64 match = STARS_PAT.match(s)
65 stars, text = match.group(1), match.group(2)
66 match = TAGS_PAT.match(s.splitlines()[0])
67 tags = match and match.group(2).strip(':').split(':') or ()
68 text = match and match.group(1) or text
69 return len(stars), text, tags
71 def node(text, children, tags):
72 return {'text': text, 'children': children, 'tags': tags}
74 istack = [[], [], [], [], [], [], [], [], []] # and so on ...
76 pn = None
77 for index, text in items:
78 n, text, tags = splititem(text)
79 assert n>0
81 if n < pn:
82 # up to the parent
83 istack[n].append(node(text, reverse(istack[pn]), tags))
84 istack[pn] = []
85 else:
86 # previous sibling OR child of one of the top nodes
87 istack[n].append(node(text, [], tags))
88 pn = n
90 return reverse(istack[1])
92 return hier(by_star())
94 def org2json(orgtext):
95 return dumps(org2py(orgtext))
98 if __name__ == '__main__':
99 from doctest import testmod
100 testmod()
101 print '--demo--'
102 from pprint import pprint
103 pprint( org2py(open('sample.org').read()) )