App Engine Python SDK version 1.8.9
[gae.git] / python / google / appengine / ext / analytics / stats.py
blobb2ab92cf9a3da25bcacb3516996af95604e70c4d
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Data structures to represent statistics used by analysis library.
23 Appstats data is loaded into data structures defined in this code.
24 URLStats holds information about all requests of an URL path,
25 URLRequestStats holds information about a specific request,
26 RPCStats holds data about a specific RPC category for each request.
27 """
30 import logging
31 import entity
34 def _RPCCategory(rpcstatsproto):
35 """Categorize Datastore RPCs by entity kind and other information.
37 The analysis tool presents a breakdown of the request latency into
38 different RPCs. Simply grouping RPCs with the same service and call name
39 together is too coarse-grained. E.g., consider a request that
40 involves two different types of datastore queries on different
41 entity kinds. More meaningful information to the developer can be
42 conveyed by presenting time spent in query_kind1, and query_kind2
43 separately. To handle this, we identify the "category" of an RPC,
44 and summarize results based on the service name, call name, and
45 category. At this point, the category field is only relevant for
46 datastore related RPCs, and is simply '' for all non-datastore RPCs.
47 For the datastore RPCs, category information usually includes the
48 relevant entity kind and other information, but the details are
49 very specific to the individual call.
51 Args:
52 rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
53 represents statistics for a single RPC in a request.
55 Returns:
56 A string which indicates category to which the RPC belongs.
57 Returns '' if category information is not relevant to this RPC.
58 """
59 category = ''
60 if not rpcstatsproto.has_datastore_details():
61 return category
62 servicecallname = rpcstatsproto.service_call_name()
63 if servicecallname == 'datastore_v3.Put':
65 category = entity.EntityListKind(
66 rpcstatsproto.datastore_details().keys_written_list())
67 elif servicecallname == 'datastore_v3.Get':
69 category = entity.EntityListKind(
70 rpcstatsproto.datastore_details().keys_read_list())
71 elif servicecallname == 'datastore_v3.Next':
76 category = entity.EntityListKind(
77 rpcstatsproto.datastore_details().keys_read_list())
78 elif servicecallname == 'datastore_v3.RunQuery':
82 if rpcstatsproto.datastore_details().has_query_kind():
83 kind = rpcstatsproto.datastore_details().query_kind()
84 else:
85 kind = 'NoKind'
86 if rpcstatsproto.datastore_details().has_query_ancestor():
88 ancestor = '_ANC'
89 else:
90 ancestor = ''
91 category = '%s%s' %(kind, ancestor)
92 return category
95 class RPCStats(object):
96 """Statistics associated with each RPC call category for a request.
98 For each RPC call category associated with a URL request, track the number of
99 calls, and total time spent summed across all calls. For datastore related
100 RPCs, track list of entities accessed (fetched/written/failed get requests).
104 _ABBRV = {
105 'datastore_v3.Put': 'ds.Put',
106 'datastore_v3.RunQuery': 'ds.Query',
107 'datastore_v3.Get': 'ds.Get',
108 'datastore_v3.Next': 'ds.Next',
111 def __init__(self, rpcstatsproto):
112 """Initialize stats first time RPC called for that URL request.
114 Args:
115 rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
116 represents statistics for a single RPC in a request.
119 self.servicecallname = rpcstatsproto.service_call_name()
120 self.category = _RPCCategory(rpcstatsproto)
121 self.time = 0
122 self.numcalls = 0
123 self.keys_read = []
124 self.keys_written = []
126 self.keys_failed_get = []
127 self.Incr(rpcstatsproto)
129 def Incr(self, rpcstatsproto):
130 """Update stats every time RPC called for that URL request.
132 Increment number of calls to RPCs in this category by 1 and increment
133 total time spent in this RPC category by time taken by this particular
134 RPC. Augment the entities read, written and missed by this RPC category
135 with the entities read, written and missed by the RPC.
137 Args:
138 rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
139 represents statistics for a single RPC in a request.
147 self.time += int(rpcstatsproto.duration_milliseconds())
148 self.numcalls += 1
149 if rpcstatsproto.has_datastore_details():
150 self.keys_read.extend(
151 rpcstatsproto.datastore_details().keys_read_list())
152 self.keys_written.extend(
153 rpcstatsproto.datastore_details().keys_written_list())
154 if self.servicecallname == 'datastore_v3.Get':
155 hits = rpcstatsproto.datastore_details().get_successful_fetch_list()
156 entities = rpcstatsproto.datastore_details().keys_read_list()
157 for index in range(len(hits)):
158 if not hits[index]:
159 self.keys_failed_get.append(entities[index])
161 def GetLabel(self):
162 """Get label used to refer to RPC category in graphs."""
163 label = RPCStats._ABBRV.get(self.servicecallname, self.servicecallname)
164 if self.category:
166 label = '%s_%s' %(label, self.category)
167 return label
169 def Match(self, rpcstatsproto):
170 """Checks if an RPC belongs to the same category as current.
172 Args:
173 rpcstatsproto: IndividualRPCStatsProto from Appstats recording which
174 represents statistics for a single RPC in a request.
176 Returns:
177 True or False. True indicates the RPC belongs to same category
178 as current one. False indicates otherwise.
180 if rpcstatsproto.service_call_name() != self.servicecallname:
181 return False
182 category = _RPCCategory(rpcstatsproto)
183 if category != self.category:
184 return False
185 return True
188 class URLRequestStats(object):
189 """Statistics associated with each URL request.
191 For each URL request, keep track of list of RPCs, statistics
192 associated with each RPC, and total response time for that
193 URL request.
196 def __init__(self, statsproto):
197 """Constructor."""
198 self.rpcstatslist = []
199 self.timestamp = statsproto.start_timestamp_milliseconds() * 0.001
206 self.totalresponsetime = int(statsproto.duration_milliseconds())
207 for t in statsproto.individual_stats_list():
208 self.AddRPCStats(t)
209 self.totalrpctime = self.TotalRPCTime()
211 def TotalRPCTime(self):
212 """Compute total time spent in all RPCs."""
213 totalrpctime = 0
214 for rpc in self.rpcstatslist:
215 totalrpctime += rpc.time
216 return totalrpctime
218 def AddRPCStats(self, rpcstatsproto):
219 """Update statistics for a given RPC called for that URL request."""
222 for rpc in self.rpcstatslist:
223 if rpc.Match(rpcstatsproto):
224 rpc.Incr(rpcstatsproto)
225 return
227 rpcstats = RPCStats(rpcstatsproto)
228 self.rpcstatslist.append(rpcstats)
230 def _IncrementCount(self, key_list, group_flag, freq, action):
231 """Helper function to increment entity (group) access counts.
233 Args:
234 key_list: List of entity keys that were accessed.
235 group_flag: Boolean. If True, entity group counts are desired.
236 If False, entity counts are desired.
237 freq: A dictionary keyed on entity (group) kind and name that
238 holds counts for reads, writes and misses to that entity (group).
239 action: Whether the access was a 'read', 'write' or 'miss'.
241 for key in key_list:
242 if group_flag:
243 name = entity.EntityGroupName(key)
244 kind = entity.EntityGroupKind(key)
245 kind_name = '%s,%s' %(kind, name)
246 else:
247 name = entity.EntityFullName(key)
248 kind = entity.EntityKind(key)
249 kind_name = '%s,%s' %(kind, name)
250 if not kind_name in freq:
251 freq[kind_name] = {'read': 0, 'write': 0, 'miss': 0}
252 freq[kind_name][action] += 1
254 def EntityGroupCount(self):
255 """Computes reads/writes/failed gets to each entity group for that request.
257 Returns:
258 freq: Dictionary keyed on entity group. Key is of the form
259 'entitygroupkind,entitygroupname' which allows organizing statistics
260 of entity groups by their kind. Value is an inner dictionary with 3
261 keys: 'read', 'write', and 'missed'. Value of each inner dictionary
262 item is the number of reads/writes/failed gets to that entity group
263 for the request.
265 freq = {}
266 for rpcstats in self.rpcstatslist:
267 self._IncrementCount(rpcstats.keys_read, True, freq, 'read')
268 self._IncrementCount(rpcstats.keys_written, True, freq, 'write')
269 self._IncrementCount(rpcstats.keys_failed_get, True, freq, 'miss')
270 return freq
272 def EntityCount(self):
273 """Computes number of reads/writes to each entity for that request.
275 Returns:
276 freq: Dictionary keyed on entity, with value being number of reads,
277 writes or failed gets to that entity for the request. The dictionary
278 key is of the form "entitykind,entityfullname" which allows organizing
279 statistics of entities by their kind.
281 freq = {}
282 for rpcstats in self.rpcstatslist:
283 self._IncrementCount(rpcstats.keys_read, False, freq, 'read')
284 self._IncrementCount(rpcstats.keys_written, False, freq, 'write')
285 self._IncrementCount(rpcstats.keys_failed_get, False, freq, 'miss')
286 return freq
289 class URLStats(object):
290 """Statistics associated with a given URL.
292 For each request of that URL, keep track of statistics associated
293 with that request such as response time, RPCs called, and
294 statistics associated with the RPC.
297 def __init__(self, url):
298 """Constructor."""
299 self.url = url
300 self.urlrequestlist = []
302 def AddRequest(self, statsproto):
303 """Add stats about new request to that URL."""
304 requeststats = URLRequestStats(statsproto)
305 self.urlrequestlist.append(requeststats)
307 def GetResponseTimeList(self):
308 """Returns list of response times across all requests of URL."""
309 responsetimelist = []
310 for urlrequest in self.urlrequestlist:
311 responsetimelist.append(urlrequest.totalresponsetime)
312 return responsetimelist
314 def GetTotalRPCTimes(self):
315 """Returns list of response times across all requests of URL."""
316 totalrpctimes = []
317 for request in self.urlrequestlist:
318 totalrpctimes.append(request.totalrpctime)
319 return totalrpctimes
321 def _Count(self, group_flag):
322 """Helper function to count accesses to entities (entity groups).
324 Args:
325 group_flag: Boolean. If true, count entity groups. If false, count
326 entities.
328 Returns:
329 Dictionary keyed on names of entities (entity groups) with values
330 corresponding to their access counts.
332 freq_total = {}
333 for request in self.urlrequestlist:
334 if group_flag:
335 freq_request = request.EntityGroupCount()
336 else:
337 freq_request = request.EntityCount()
338 for name, freq in freq_request.items():
339 if not name in freq_total:
340 freq_total[name] = {'read': 0, 'write': 0, 'miss': 0}
341 freq_total[name]['read'] += freq['read']
342 freq_total[name]['write'] += freq['write']
343 freq_total[name]['miss'] += freq['miss']
344 return freq_total
346 def EntityGroupCount(self):
347 """Get reads/writes/failed gets to each entity group over all URL requests.
349 Returns:
350 freq_total: Dict keyed on entity group, with value being
351 count of reads/writes/failed gets to that entity group across
352 all requests.
354 return self._Count(True)
356 def EntityCount(self):
357 """Computes reads/writes/failed gets to each entity across all URL requests.
359 Returns:
360 freq_total: Dict keyed on entity name (in kind_fullname form), with
361 value being number of reads and writes to that entity across all
362 requests.
364 return self._Count(False)
366 def Dump(self):
367 """Dumps URL statistics to INFO/DEBUG logs for debugging."""
368 logging.info('URL: %s', self.url)
369 for urlrequest in self.urlrequestlist:
370 logging.info('Resptime: %d', urlrequest.totalresponsetime)
371 for rpc in urlrequest.rpcstatslist:
372 logging.info('%s %s %d %d read:%d written:%d failedgets:%d',
373 rpc.servicecallname,
374 rpc.category,
375 rpc.time,
376 rpc.numcalls,
377 len(rpc.keys_read),
378 len(rpc.keys_written),
379 len(rpc.keys_failed_get))
381 logging.debug('Keys Read')
382 for key in rpc.keys_read:
383 logging.debug('%s ', entity.EntityFullName(key))
384 logging.debug('Keys Written')
385 for key in rpc.keys_written:
386 logging.debug('%s ', entity.EntityFullName(key))
387 logging.info('Keys Failed Get')
388 for key in rpc.keys_failed_get:
389 logging.debug('%s ', entity.EntityFullName(key))