[project @ sridhar.ratna@gmail.com-20071015063420-bbogksajnci1ae5b]
[orgweb.git] / org2json.py
blob8b12de9ba3bdaf5d7d883247be3243c38bb3c570
1 # org2json
2 # parse a org-mode file and convert it to JSON
4 __author__ = "Sridhar Ratnakumar <http://nearfar.org/>"
6 import re
7 from itertools import count
9 from simplejson import dumps
11 def rindexed(seq):
12 """
13 >>> l = [5,7,9]
14 >>> print list(rindexed(l))
15 [(2, 9), (1, 7), (0, 5)]
16 """
17 return zip(
18 range(len(seq))[::-1],
19 reversed(seq))
21 def reverse(iter):
22 l = list(iter)
23 l.reverse()
24 return l
27 def org2py(orgtext):
28 """Parse the given org file text and return the Python data structure
30 >>> j = org2py(open('sample.org').read())
31 >>> j=list(j)
32 >>> j[-1]['text']
33 'Projects'
34 >>> j[-2]['text']
35 'Whims'
36 >>> j[-3]['text']
37 'Online Stuff'
38 >>> j[-4]['text']
39 'Stuff'
40 >>> j[-5]['text']
41 'Travels'
42 """
43 lines = orgtext.splitlines()
44 def e(i1, i2):
45 return '\n'.join(lines[i1:i2])
47 def by_star():
48 last_index = len(lines)
49 for index, line in rindexed(lines):
50 if line.startswith("*"):
51 yield [index, e(index, last_index)]
52 last_index = index
54 def hier(items):
55 STARS_PAT = re.compile(r"^(\**) (.*)", re.DOTALL)
56 TAGS_PAT = re.compile(r"\s((\w)*(:(\w)*)*:)$")
57 def splititem(line):
58 """
59 >>> splititem("*** Foo Bar :TAG1:TAG2:")
60 (3, 'Foo Bar', ('TAG1, 'TAG2'))
61 >>> splititem("** write org-mode tutorial")
62 (2, 'write org-mode tutorial')
63 """
64 match = STARS_PAT.match(line)
65 stars, text = match.group(1), match.group(2)
66 match = TAGS_PAT.search(text)
67 tags = match and match.group(1).strip(':').split(':') or ()
68 return len(stars), text, tags
70 def node(text, children, tags):
71 return {'text': text, 'children': children, 'tags': tags}
73 istack = [[], [], [], [], [], [], [], [], []] # and so on ...
75 pn = None
76 for index, text in items:
77 n, text, tags = splititem(text)
78 assert n>0
80 if n < pn:
81 # up to the parent
82 istack[n].append(node(text, reverse(istack[pn]), tags))
83 istack[pn] = []
84 else:
85 # previous sibling OR child of one of the top nodes
86 istack[n].append(node(text, [], tags))
87 pn = n
89 return reverse(istack[1])
91 return hier(by_star())
93 def org2json(orgtext):
94 return dumps(org2py(orgtext))
97 if __name__ == '__main__':
98 from doctest import testmod
99 testmod()
100 print '--demo--'
101 from pprint import pprint
102 pprint( org2py(open('sample.org').read()) )