k2test.py

   1 # -*- coding: utf-8 -*-
   2
   3 """Module for testing kanjidic2.
   4
   5 Added when testing on Debian Lenny (stable).  The latest Python is
   6 2.5.2, which seems to not run the kanjidic2 console tester due to
   7 relative import problems.
   8
   9 """
  10
  11 from __future__ import absolute_import
  12
  13 from jbparse.kanjidic2 import Parser, encode_or_else
  14 import os
  15
  16
  17 if __name__ == "__main__":
  18     import sys, locale
  19     # This is horrid, but it seems like in Python 2.x there doesn't
  20     # exist an alternative accepted method that's transparent to the
  21     # typical end user...
  22     reload(sys)
  23     sys.setdefaultencoding(locale.getpreferredencoding())
  24
  25     try:
  26         dfname, args = sys.argv[1], sys.argv[2:]
  27         assert args
  28     except (IndexError, AssertionError):
  29         print _(u"Syntax: %s <dict_file> <character [...]>") % sys.argv[0]
  30         exit(-1)
  31
  32     try:
  33         p = Parser(dfname)
  34     except Exception, e:
  35         print _(u"Could not create Kanjidic2Parser: %s") % unicode(e)
  36         exit(-1)
  37
  38     if os.name == "nt":
  39         charset = "cp932"
  40     else:
  41         charset = "utf-8"
  42
  43     print u"HEADER"
  44     print u"======"
  45     print p.get_header()
  46     print
  47     print u"%d characters found" % len(p.characters)
  48
  49     for i, kanji in enumerate(p.search("".join(args).decode(charset))):
  50         kstr = encode_or_else(unicode(kanji))
  51         print _(u"Entry %d:\n%s\n") % (i+1, kstr)