org2json.py

   1 # org2json
   2 #  parse a org-mode file and convert it to JSON
   3
   4 __author__ = "Sridhar Ratnakumar <http://nearfar.org/>"
   5
   6 import re
   7 from itertools import count
   8
   9 from simplejson import dumps
  10
  11 def rindexed(seq):
  12     """
  13     >>> l = [5,7,9]
  14     >>> print list(rindexed(l))
  15     [(2, 9), (1, 7), (0, 5)]
  16     """
  17     return zip(
  18         range(len(seq))[::-1],
  19         reversed(seq))
  20
  21 def reverse(iter):
  22     l = list(iter)
  23     l.reverse()
  24     return l
  25
  26
  27 def org2py(orgtext):
  28     """Parse the given org file text and return the Python data structure
  29
  30     >>> j = org2py(open('sample.org').read())
  31     >>> j=list(j)
  32     >>> j[-1]['text']
  33     'Projects'
  34     >>> j[-2]['text']
  35     'Whims'
  36     >>> j[-3]['text']
  37     'Online Stuff'
  38     >>> j[-4]['text']
  39     'Stuff'
  40     >>> j[-5]['text']
  41     'Travels'
  42     """
  43     lines = orgtext.splitlines()
  44     def e(i1, i2):
  45         return '\n'.join(lines[i1:i2])
  46
  47     def by_star():
  48         last_index = len(lines)
  49         for index, line in rindexed(lines):
  50             if line.startswith("*"):
  51                 yield [index, e(index, last_index)]
  52                 last_index = index
  53
  54     def hier(items):
  55         STARS_PAT = re.compile(r"^(\**) (.*)", re.DOTALL)
  56         TAGS_PAT  = re.compile(r"\s((\w)*(:(\w)*)*:)$")
  57         def splititem(line):
  58             """
  59             >>> splititem("*** Foo Bar :TAG1:TAG2:")
  60             (3, 'Foo Bar', ('TAG1, 'TAG2'))
  61             >>> splititem("** write org-mode tutorial")
  62             (2, 'write org-mode tutorial')
  63             """
  64             match = STARS_PAT.match(line)
  65             stars, text = match.group(1), match.group(2)
  66             match = TAGS_PAT.search(text)
  67             tags = match and match.group(1).strip(':').split(':') or ()
  68             return len(stars), text, tags
  69
  70         def node(text, children, tags):
  71             return {'text': text, 'children': children, 'tags': tags}
  72
  73         istack = [[], [], [], [], [], [], [], [], []] # and so on ...
  74
  75         pn = None
  76         for index, text in items:
  77             n, text, tags = splititem(text)
  78             assert n>0
  79
  80             if n < pn:
  81                 # up to the parent
  82                 istack[n].append(node(text, reverse(istack[pn]), tags))
  83                 istack[pn] = []
  84             else:
  85                 # previous sibling OR child of one of the top nodes
  86                 istack[n].append(node(text, [], tags))
  87             pn = n
  88
  89         return reverse(istack[1])
  90
  91     return hier(by_star())
  92
  93 def org2json(orgtext):
  94     return dumps(org2py(orgtext))
  95
  96
  97 if __name__ == '__main__':
  98     from doctest import testmod
  99     testmod()
 100     print '--demo--'
 101     from pprint import pprint
 102     pprint( org2py(open('sample.org').read()) )