2 # -*- coding: utf-8 -*-
4 # Server monitoring system
6 # Copyright © 2011 Rodrigo Eduardo Lazo Paz
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
23 """Data collection routines.
25 Collectors are classes which retrieve data from remote
26 servers. Currently, there is only one class implement which makes
27 parallel request to http servers.
30 __author__
= "rlazo.paz@gmail.com (Rodrigo Lazo)"
39 from urllib2
import urlopen
, URLError
42 REQUEST_TIMEOUT
= 5 # Seconds to timeout remote requests
45 # TODO: (09/19) add support for string values (Boolean?).
46 # TODO: (09/19) make parsing resilient to spaces in values.
47 # TODO: (09/20) logging facilities
48 def _retriever(source
, timestamp
, output_queue
):
49 """Fetches and parses data from a remote source.
51 Queries a remote http server using the url
53 http://host:port/?format=text
55 It expects a plain text (no html formatting) list of values, with
56 a single data point per line, in the form:
60 - `id@` is an optional variable name prefix that will be discarted
62 - `varname` is the actual varname and must not contain '@' or ' '.
63 - `value` must be a integer(42) or float(3.14159); any space will
66 Collected data will be output to the `output_queue` as a pair:
67 ("ip:port", {"varname": (timestamp, value)}).
70 - `source`: Pair, (ip, port) address of source http server.
71 - `timestamp`: Integer, unix time to use as collected data's timestamp.
72 - `output_queue`: Queue, where to output the collected data, in
76 host_address
= "%s:%s" % (source
[0], source
[1])
77 url
= "http://%s/?format=text" % host_address
80 with contextlib
.closing(urlopen(url
, timeout
=REQUEST_TIMEOUT
)) as fd
:
82 elements
= line
.split()
83 if (len(elements
) == 2):
84 fullvarname
, value
= elements
85 varname
= fullvarname
.split('@')[-1]
88 elif value
== 'false':
94 vars_dict
[varname
] = (timestamp
, value
)
95 output_queue
.put((host_address
, vars_dict
))
97 print "ERROR RETRIEVING %s" % host_address
98 output_queue
.put((None, None))
101 class HttpCollector(object):
102 """Retrieves data from HTTP sources.
104 Registers a list of data source server, which must expose data
105 using plain-text format through HTTP and collects their data. A
106 simple use-case is to create a scheduling mechanism that initiates
107 repetitive collections and feed this information into a
108 database. For more details about the data formatting, see the
109 `HttpCollector.collect` method.
112 def __init__(self
, sources
):
116 - `sources`: Iterable, pairs of (ip, port) of data sources.
118 self
._sources
= sources
119 self
._last
_collected
= {}
123 """Retrieves data form the sources.
125 Multi-threated data retrieval. For details about data
126 processing see `_retriever`.
129 Dictionary, {"ip:port": {"varname": (timestamp, value)}}.
132 queue
= Queue
.Queue()
133 timestamp
= int(time
.time())
135 for source
in self
._sources
:
136 threads
.append(threading
.Thread(target
=_retriever
,
137 args
=(source
, timestamp
, queue
)))
139 for _
in range(len(self
._sources
)):
140 host_address
, data
= queue
.get()
141 if host_address
is not None:
142 result
[host_address
] = data
143 self
._last
_collected
= result
144 self
._timestamp
= timestamp
147 def get_collected_data(self
):
148 """Returns a copy of the last collected data.
150 See `HttpCollector.collect` for details about the returned data format.
152 return self
._last
_collected
154 def get_last_collection_timestamp(self
):
155 """Timestamp of the lastest collection.
157 Returned valu is the unix time (as integer) of the latest
158 collection, or 0 no collection was performed.
160 return self
._timestamp
# 0 if never done