From 165a27a6f1cc8985a8a72c17debabfa554f00e1b Mon Sep 17 00:00:00 2001 From: Kagamin Date: Sat, 14 Feb 2009 14:51:57 +0300 Subject: [PATCH] plugins, parsing threads list with BeautifulSoup --- server/base_plugin.py | 18 ++++++++++++++ server/chanspyd | 9 +++---- server/htdocs/index.html | 2 +- server/htdocs/styles/style.css | 21 ++++++++++++++--- server/html_generation.py | 24 ++++++++++++++----- server/plugins.py | 26 +++++++++++++++++++++ server/plugins/plugin_wakaba.py | 52 +++++++++++++++++++++++++++++++++++++++++ server/server.py | 43 +++++++++++++++++++++++++++------- 8 files changed, 173 insertions(+), 22 deletions(-) create mode 100644 server/base_plugin.py create mode 100644 server/plugins.py create mode 100644 server/plugins/plugin_wakaba.py diff --git a/server/base_plugin.py b/server/base_plugin.py new file mode 100644 index 0000000..644fc95 --- /dev/null +++ b/server/base_plugin.py @@ -0,0 +1,18 @@ +import urllib + +class BasePlugin(): + '''Base plugin class. Replacing by users plugins.''' + + def get_threads_list(self, chan_name, board_name, chan_settings): + return [] + + #---------------------------------------------# + def get_uri(self, uri): + try: + u = urllib.urlopen(uri) + s = u.read() + u.close() + except: + return None + else: + return s diff --git a/server/chanspyd b/server/chanspyd index 75327bb..0db3e9c 100755 --- a/server/chanspyd +++ b/server/chanspyd @@ -20,16 +20,16 @@ import sys import os +os.chdir(sys.path[0]) import signal import config +import plugins import server -os.chdir(sys.path[0]) - #---------------------------------------------- def main(): - '''Main function. Write pid file and run server.''' + '''Main function. Write pid file, loading plugins and run server.''' if os.path.exists(PID): sys.stderr.write('chanspyd\'s already executed. If not then remove %s.\n' %PID) @@ -45,7 +45,8 @@ def main(): sys.exit(2) signal.signal(signal.SIGTERM, sigTermCB) - server.start_server(settings) + _plugins = plugins.load_plugins() + server.start_server(settings, _plugins) #---------------------------------------------- def help(): diff --git a/server/htdocs/index.html b/server/htdocs/index.html index da3fa67..6f49c32 100644 --- a/server/htdocs/index.html +++ b/server/htdocs/index.html @@ -8,7 +8,7 @@ - + diff --git a/server/htdocs/styles/style.css b/server/htdocs/styles/style.css index 92238ae..cafec9e 100644 --- a/server/htdocs/styles/style.css +++ b/server/htdocs/styles/style.css @@ -3,17 +3,32 @@ html, body { padding: 0; } -div { +div.border { + border: 1px solid red; +} + +div.chans_list, div.boards_list, div.threads_list { text-align: center; } -div a { +div.threads_list { + margin: 5px; + border: 1px solid red; +} + +div.chans_list a, div.boards_list a, div.threads_list a { display: block; text-decoration: none; color: #800000; background: #F0E0D6; } -div a:hover { +div.chans_list a:hover, div.boards_list a:hover, div.threads_list a:hover { background: #FFFFEE; } + +h2.caption { + text-align: center; + margin: 3px; + padding: 0; +} diff --git a/server/html_generation.py b/server/html_generation.py index 3a66ecd..c3c31c8 100644 --- a/server/html_generation.py +++ b/server/html_generation.py @@ -6,8 +6,8 @@ import os from BeautifulSoup import BeautifulSoup # some constants -htdocs_dir = u'htdocs' -template = os.path.join(htdocs_dir, u'list_template.html') +htdocs_dir = 'htdocs' +template = os.path.join(htdocs_dir, 'list_template.html') try: f = open(template, 'r') template_data = f.read() @@ -23,9 +23,9 @@ def get_chans_html(chans_list): body = u'\n' for (chan_name, uri) in chans_list: - body += u'
%s
\n' %(uri, chan_name) + body += u'
%s
\n' %(uri, chan_name) - soup.body.replaceWith(u'%s' %body) + soup.body.replaceWith(u'
\n%s\n
' %body) return unicode(soup)[:-1] #---------------------------------------------# @@ -36,8 +36,20 @@ def get_boards_html(boards_list): body = u'\n' for (board_name, uri) in boards_list: - # TODO: do something with this shit - body += u'
%s
\n' %(uri, board_name) + body += u'
%s
\n' %(uri, board_name) + + soup.body.replaceWith(u'
\n%s\n
' %body) + return unicode(soup)[:-1] + +#---------------------------------------------# +def get_threads_html(threads_list, chan_name, board_name): + '''Generate threads html.''' + + soup = BeautifulSoup(template_data) + + body = u'

%s — %s

\n' %(chan_name, board_name) + for thread_dict in threads_list: + body += u'
%s | %s
\n' %( thread_dict['id'], thread_dict['title'], thread_dict['date']) soup.body.replaceWith(u'%s' %body) return unicode(soup)[:-1] diff --git a/server/plugins.py b/server/plugins.py new file mode 100644 index 0000000..2f1c465 --- /dev/null +++ b/server/plugins.py @@ -0,0 +1,26 @@ +import os +import imp + +# some constants +plugins_dir = 'plugins' + +def load_plugins(): + '''Load plugins.''' + + plugins = {} + + for plugin in os.listdir(plugins_dir): + if plugin.startswith('plugin_') and plugin.endswith('.py'): + plugin = plugin[:-3] + plugin_name = plugin[7:] + file, pathname, description = imp.find_module('%s/%s' %(plugins_dir, plugin)) + + #try: + plugin_object = imp.load_module(plugin, file, pathname, description).Plugin() + #except: + # logging.error('PLUGIN: can\'t load %s' %board_type) + #else: + plugins[plugin_name] = plugin_object + #logging.info('PLUGIN: %s loaded' %board_type) + + return plugins diff --git a/server/plugins/plugin_wakaba.py b/server/plugins/plugin_wakaba.py new file mode 100644 index 0000000..519428b --- /dev/null +++ b/server/plugins/plugin_wakaba.py @@ -0,0 +1,52 @@ +import datetime +from BeautifulSoup import BeautifulSoup + +import re +re_digits = re.compile('[0-9]*') + +from base_plugin import BasePlugin + +class Plugin(BasePlugin): + """Wakaba plugin.""" + + def get_threads_list(self, chan_name, board_name, chan_settings): + data = self.get_uri('%s/%s/' %(chan_settings['base_uri'], board_name)) + threads = [] + if data: + data = data.replace('
', '') + soup = BeautifulSoup(data) + print('### Start findAll ### %s' %datetime.datetime.now()) + # tooooo slow ~0.8s + threads_data = soup.findAll('table', {'id': 'mythread', 'class': 'mythread'}) + print('### End findAll ### %s' %datetime.datetime.now()) + for i in range(1, len(threads_data) - 1): + threads.append(self.parse_thread(threads_data[i])) + threads.append(self.parse_thread(threads_data[2])) + return threads + + def parse_thread(self, soup): + thread_dict = { 'id': '' + , 'date': '' + , 'img_uri': '' + , 'title': '' + , 'author': '' + , 'message': '' + } + + thread_dict['id'] = dict(soup.find('a', {'name': re_digits}).attrs)['name'] # id + label = soup.find('label').contents + thread_dict['date'] = label[len(label) - 1] # date + img_uri = soup.find('a', {'target': '_blank'}) + if img_uri: + thread_dict['img_uri'] = dict(img_uri.attrs)['href'] # img_uri + title = soup.find('span', {'class': 'filetitle'}).contents # title + if title: + thread_dict['title'] = title[0] + author = soup.find('span', {'class': 'postername'}).contents + if author: + thread_dict['author'] = author[0] # author + message = soup.find('blockquote').p + if message: + thread_dict['message'] = message.contents[0] # message + + return thread_dict diff --git a/server/server.py b/server/server.py index 9bfb84f..61d5b97 100644 --- a/server/server.py +++ b/server/server.py @@ -13,19 +13,20 @@ import misc # some constants htdocs_dir = u'htdocs' server = None -settings = {} +settings = None +plugins = None #---------------------------------------------# class MyHandler(BaseHTTPRequestHandler): def do_GET(self): data = None - # actions starts with «/?» + ### actions starts with «/?» if self.path.startswith('/?'): action = self.path[2:] if action == 'settings': data = ('Унью!',) - elif action == 'chans_list': + elif action == 'chans': chans_list = get_chans_list() data = (get_chans_html(chans_list),) else: @@ -34,24 +35,31 @@ class MyHandler(BaseHTTPRequestHandler): path = '%s%s' %(htdocs_dir, self.path) if os.path.isdir(path): path += '/index.html' - # physical file + ### physical file if os.path.exists(path): f = open(path) data = (f.read(), self.get_content_type(path)) f.close() - # /chan/board/thread/ else: print self.path args = filter(None, self.path.split('/')) print args - # /chan/ + ### /chan/ if len(args) == 1: boards_list = get_boards_list(args[0]) if boards_list: data = (get_boards_html(boards_list),) else: self.send_error(501, u'Can\'t find boards on %s' %args[0]) + ### /chan/board/ + elif len(args) == 2: + plugin = get_plugin(args[0], args[1]) + if plugin: + threads_list = plugin.get_threads_list(args[0], args[1], get_chan_settings(args[0])) + data = (get_threads_html(threads_list, args[0], args[1]),) + else: + self.send_error(501, u'Can\'t find plugin for %s/%s/' %(args[0], args[1])) else: self.send_error(404, u'File not found: %s' %path) @@ -67,9 +75,11 @@ class MyHandler(BaseHTTPRequestHandler): return mimetypes.guess_type(filename)[0] or 'application/octet-stream' #---------------------------------------------# -def start_server(_settings): - global server, settings +def start_server(_settings, _plugins): + global server, settings, plugins settings = _settings + plugins = _plugins + server = HTTPServer(('', settings['daemon']['port']), MyHandler) server.serve_forever() @@ -97,3 +107,20 @@ def get_boards_list(_chan_name): boards_list.append((board_name, u'/%s/%s/' %(chan_name, board_name))) return boards_list + +def get_plugin(_chan_name, _board_name): + for chan_item in settings['chans']: + chan_name, chan_settings = chan_item.items()[0] + if _chan_name == chan_name: + for board_item in chan_settings['boards']: + board_name, board_settings = board_item.items()[0] + if _board_name == board_name: + plugin_name = board_settings['type'] + if plugins.has_key(plugin_name): + return plugins[plugin_name] + +def get_chan_settings(_chan_name): + for chan_item in settings['chans']: + chan_name, chan_settings = chan_item.items()[0] + if _chan_name == chan_name: + return chan_settings -- 2.11.4.GIT