App Engine Python SDK version 1.8.9
[gae.git] / python / google / appengine / datastore / datastore_stats_generator.py
blob81ed3deb86fd5287ce866eccc310307f37668cc9
1 #!/usr/bin/env python
3 # Copyright 2007 Google Inc.
5 # Licensed under the Apache License, Version 2.0 (the "License");
6 # you may not use this file except in compliance with the License.
7 # You may obtain a copy of the License at
9 # http://www.apache.org/licenses/LICENSE-2.0
11 # Unless required by applicable law or agreed to in writing, software
12 # distributed under the License is distributed on an "AS IS" BASIS,
13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 # See the License for the specific language governing permissions and
15 # limitations under the License.
21 """Generate Datastore Stats over Dev mode appserver's datastore."""
31 import datetime
32 import logging
34 from google.appengine.api import datastore
35 from google.appengine.api import datastore_admin
36 from google.appengine.api import datastore_types
37 from google.appengine.api import users
38 from google.appengine.ext.db import stats
40 DELETE_BATCH_SIZE = 100
43 _GLOBAL_KEY = (stats.GlobalStat, 'total_entity_usage', '')
50 _PROPERTY_TYPE_TO_DSS_NAME = {
51 unicode: ('String', 'STRING'),
52 bool: ('Boolean', 'BOOLEAN'),
53 long: ('Integer', 'INT64'),
54 type(None): ('NULL', 'NULL'),
55 float: ('Float', 'DOUBLE'),
56 datastore_types.Key: ('Key', 'REFERENCE'),
57 datastore_types.Blob: ('Blob', 'STRING'),
58 datastore_types.EmbeddedEntity: ('EmbeddedEntity', 'STRING'),
59 datastore_types.ByteString: ('ShortBlob', 'STRING'),
60 datastore_types.Text: ('Text', 'STRING'),
61 users.User: ('User', 'USER'),
62 datastore_types.Category: ('Category', 'STRING'),
63 datastore_types.Link: ('Link', 'STRING'),
64 datastore_types.Email: ('Email', 'STRING'),
65 datetime.datetime: ('Date/Time', 'INT64'),
66 datastore_types.GeoPt: ('GeoPt', 'POINT'),
67 datastore_types.IM: ('IM', 'STRING'),
68 datastore_types.PhoneNumber: ('PhoneNumber', 'STRING'),
69 datastore_types.PostalAddress: ('PostalAddress', 'STRING'),
70 datastore_types.Rating: ('Rating', 'INT64'),
71 datastore_types.BlobKey: ('BlobKey', 'STRING'),
77 class DatastoreStatsProcessor(object):
78 """Generates datastore stats for an app's an datastore entities."""
80 def __init__(self, _app=None):
81 self.app_id = datastore_types.ResolveAppId(_app)
84 self.whole_app_stats = {}
88 self.namespace_stats = {}
89 self.found_non_empty_namespace = False
92 self.old_stat_keys = []
95 self.timestamp = datetime.datetime.utcnow()
97 def __ScanAllNamespaces(self):
98 """Scans all the namespaces and processes each namespace."""
99 namespace_query = datastore.Query('__namespace__', _app=self.app_id)
101 for namespace_entity in namespace_query.Run():
102 name = namespace_entity.key().name()
103 if name is None:
104 name = ''
105 self.__ProcessNamespace(name)
107 def __ProcessNamespace(self, namespace):
108 """Process all the entities in a given namespace."""
110 all_query = datastore.Query(namespace=namespace, _app=self.app_id)
113 for entity in all_query.Run():
114 self.found_non_empty_namespace |= (namespace != '')
115 proto = entity.ToPb()
116 proto_size = len(proto.SerializeToString())
118 if entity.key().kind() in stats._DATASTORE_STATS_CLASSES_BY_KIND:
122 stat_kind = stats._DATASTORE_STATS_CLASSES_BY_KIND[entity.key().kind()]
124 self.old_stat_keys.append(entity.key())
125 self.__AggregateTotal(proto_size, entity.key(), proto, namespace,
126 stat_kind)
127 else:
128 self.__ProcessUserEntity(proto_size, entity.key(), proto, namespace)
130 def __GetPropertyIndexStat(self, namespace, kind_name,
131 entity_key_size, prop):
132 """Return the size and count of indexes for a property of an EntityProto."""
134 property_index_size = (len(self.app_id) + len(kind_name) +
135 len(prop.value().SerializeToString()) +
136 len(namespace) + entity_key_size)
138 return (property_index_size, 2)
140 def __GetTypeIndexStat(self, namespace, kind_name, entity_key_size):
141 """Return the size and count of indexes by type of an EntityProto."""
142 type_index_size = (len(self.app_id) + len(kind_name) + entity_key_size
143 + len(namespace))
144 return (type_index_size, 1)
146 def __ProcessUserEntity(self, proto_size, key, proto, namespace):
147 """Increment datastore stats for a non stats record."""
148 self.__AggregateTotal(proto_size, key, proto, namespace, None)
150 kind_name = key.kind()
152 entity_key_size = (len(proto.key().app()) + len(namespace) +
153 len(proto.key().path().SerializeToString()) +
154 len(proto.entity_group().SerializeToString()))
156 self.__AggregateCompositeIndices(proto, namespace, kind_name,
157 entity_key_size)
159 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,
160 kind_name,
161 entity_key_size)
162 property_index_count = 0
163 property_index_size = 0
166 for prop_list in (proto.property_list(), proto.raw_property_list()):
167 for prop in prop_list:
168 index_size, index_count = self.__GetPropertyIndexStat(namespace,
169 kind_name,
170 entity_key_size,
171 prop)
172 property_index_size += index_size
173 property_index_count += index_count
175 builtin_index_size = type_index_size + property_index_size
176 builtin_index_count = type_index_count + property_index_count
178 self.__Increment(self.whole_app_stats, 1,
179 (stats.KindStat, kind_name, ''),
180 proto_size,
181 builtin_index_count=builtin_index_count,
182 builtin_index_size=builtin_index_size,
183 kind_name=kind_name)
185 self.__Increment(self.namespace_stats, 1,
186 (stats.NamespaceKindStat, kind_name, namespace),
187 proto_size,
188 builtin_index_count=builtin_index_count,
189 builtin_index_size=builtin_index_size,
190 kind_name=kind_name)
194 if key.parent() is None:
195 whole_app_model = stats.KindRootEntityStat
196 namespace_model = stats.NamespaceKindRootEntityStat
197 else:
198 whole_app_model = stats.KindNonRootEntityStat
199 namespace_model = stats.NamespaceKindNonRootEntityStat
201 self.__Increment(self.whole_app_stats, 1,
202 (whole_app_model, kind_name, ''),
203 proto_size,
204 kind_name=kind_name)
206 self.__Increment(self.namespace_stats, 1,
207 (namespace_model, kind_name, namespace),
208 proto_size,
209 kind_name=kind_name)
211 self.__ProcessProperties(
212 kind_name,
213 namespace,
214 entity_key_size,
215 (proto.property_list(), proto.raw_property_list()))
217 def __ProcessProperties(self, kind_name, namespace, entity_key_size,
218 prop_lists):
219 for prop_list in prop_lists:
220 for prop in prop_list:
221 try:
222 value = datastore_types.FromPropertyPb(prop)
223 self.__AggregateProperty(kind_name, namespace, entity_key_size,
224 prop, value)
225 except (AssertionError, AttributeError, TypeError, ValueError), e:
226 logging.error('Cannot process property %r, exception %s' %
227 (prop, e))
229 def __AggregateProperty(self, kind_name, namespace, entity_key_size,
230 prop, value):
231 property_name = prop.name()
232 property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][0]
233 index_property_type = _PROPERTY_TYPE_TO_DSS_NAME[type(value)][1]
234 size = len(prop.SerializeToString())
237 index_size, index_count = self.__GetPropertyIndexStat(namespace, kind_name,
238 entity_key_size, prop)
244 self.__Increment(self.whole_app_stats, 1,
245 (stats.PropertyTypeStat, property_type, ''),
246 size,
247 builtin_index_count=0,
248 builtin_index_size=0,
249 property_type=property_type)
251 self.__Increment(self.whole_app_stats, 0,
252 (stats.PropertyTypeStat, index_property_type, ''),
254 builtin_index_count=index_count,
255 builtin_index_size=index_size,
256 property_type=index_property_type)
258 self.__Increment(self.namespace_stats, 1,
259 (stats.NamespacePropertyTypeStat,
260 property_type, namespace),
261 size,
262 builtin_index_count=0,
263 builtin_index_size=0,
264 property_type=property_type)
266 self.__Increment(self.namespace_stats, 0,
267 (stats.NamespacePropertyTypeStat,
268 index_property_type, namespace),
270 builtin_index_count=index_count,
271 builtin_index_size=index_size,
272 property_type=index_property_type)
275 self.__Increment(self.whole_app_stats, 1,
276 (stats.KindPropertyTypeStat,
277 property_type + '_' + kind_name, ''),
278 size,
279 builtin_index_count=0,
280 builtin_index_size=0,
281 property_type=property_type, kind_name=kind_name)
283 self.__Increment(self.whole_app_stats, 0,
284 (stats.KindPropertyTypeStat,
285 index_property_type + '_' + kind_name, ''),
287 builtin_index_count=index_count,
288 builtin_index_size=index_size,
289 property_type=index_property_type, kind_name=kind_name)
291 self.__Increment(self.namespace_stats, 1,
292 (stats.NamespaceKindPropertyTypeStat,
293 property_type + '_' + kind_name, namespace),
294 size,
295 builtin_index_count=0,
296 builtin_index_size=0,
297 property_type=property_type, kind_name=kind_name)
299 self.__Increment(self.namespace_stats, 0,
300 (stats.NamespaceKindPropertyTypeStat,
301 index_property_type + '_' + kind_name, namespace),
303 builtin_index_count=index_count,
304 builtin_index_size=index_size,
305 property_type=index_property_type, kind_name=kind_name)
308 self.__Increment(self.whole_app_stats, 1,
309 (stats.KindPropertyNameStat,
310 property_name + '_' + kind_name, ''),
311 size,
312 builtin_index_count=index_count,
313 builtin_index_size=index_size,
314 property_name=property_name, kind_name=kind_name)
316 self.__Increment(self.namespace_stats, 1,
317 (stats.NamespaceKindPropertyNameStat,
318 property_name + '_' + kind_name, namespace),
319 size,
320 builtin_index_count=index_count,
321 builtin_index_size=index_size,
322 property_name=property_name, kind_name=kind_name)
325 self.__Increment(self.whole_app_stats, 1,
326 (stats.KindPropertyNamePropertyTypeStat,
327 property_type + '_' + property_name + '_' + kind_name,
328 ''), size,
329 builtin_index_count=0,
330 builtin_index_size=0,
331 property_type=property_type,
332 property_name=property_name, kind_name=kind_name)
334 self.__Increment(self.whole_app_stats, 0,
335 (stats.KindPropertyNamePropertyTypeStat,
336 index_property_type + '_' + property_name + '_' +
337 kind_name,
338 ''), 0,
339 builtin_index_count=index_count,
340 builtin_index_size=index_size,
341 property_type=index_property_type,
342 property_name=property_name, kind_name=kind_name)
344 self.__Increment(self.namespace_stats, 1,
345 (stats.NamespaceKindPropertyNamePropertyTypeStat,
346 property_type + '_' + property_name + '_' + kind_name,
347 namespace),
348 size,
349 builtin_index_count=0,
350 builtin_index_size=0,
351 property_type=property_type,
352 property_name=property_name, kind_name=kind_name)
354 self.__Increment(self.namespace_stats, 0,
355 (stats.NamespaceKindPropertyNamePropertyTypeStat,
356 index_property_type + '_' + property_name + '_' +
357 kind_name,
358 namespace),
360 builtin_index_count=index_count,
361 builtin_index_size=index_size,
362 property_type=index_property_type,
363 property_name=property_name, kind_name=kind_name)
365 def __GetCompositeIndexStat(self, definition, proto, namespace, kind_name,
366 entity_key_size):
367 """Get statistics of composite index for a index definition of an entity."""
374 property_list = proto.property_list()
375 property_count = []
376 property_size = []
377 index_count = 1
378 for indexed_prop in definition.property_list():
379 name = indexed_prop.name()
380 count = 0
381 prop_size = 0
382 for prop in property_list:
383 if prop.name() == name:
384 count += 1
385 prop_size += len(prop.SerializeToString())
387 property_count.append(count)
388 property_size.append(prop_size)
389 index_count *= count
391 if index_count == 0:
392 return (0, 0)
394 index_only_size = 0
395 for i in range(len(property_size)):
396 index_only_size += property_size[i] * (index_count / property_count[i])
402 index_size = (index_count * (entity_key_size + len(kind_name) +
403 len(self.app_id) + len(namespace)) +
404 index_only_size * 2)
406 return (index_size, index_count)
408 def __AggregateCompositeIndices(self, proto, namespace, kind_name,
409 entity_key_size):
410 """Aggregate statistics of composite indexes for an entity."""
411 composite_indices = datastore_admin.GetIndices(self.app_id)
412 for index in composite_indices:
413 definition = index.definition()
414 if kind_name != definition.entity_type():
415 continue
417 index_size, index_count = self.__GetCompositeIndexStat(definition, proto,
418 namespace,
419 kind_name,
420 entity_key_size)
422 if index_count == 0:
423 continue
426 name_id = namespace
427 if not name_id:
428 name_id = 1
431 self.__Increment(self.whole_app_stats, 0, _GLOBAL_KEY, 0,
432 composite_index_count=index_count,
433 composite_index_size=index_size)
435 self.__Increment(self.whole_app_stats, 0,
436 (stats.NamespaceStat, name_id, ''), 0,
437 composite_index_count=index_count,
438 composite_index_size=index_size,
439 subject_namespace=namespace)
441 self.__Increment(self.namespace_stats, 0,
442 (stats.NamespaceGlobalStat, 'total_entity_usage',
443 namespace), 0,
444 composite_index_count=index_count,
445 composite_index_size=index_size)
448 self.__Increment(self.whole_app_stats, 0,
449 (stats.KindStat, kind_name, ''), 0,
450 composite_index_count=index_count,
451 composite_index_size=index_size,
452 kind_name=kind_name)
454 self.__Increment(self.namespace_stats, 0,
455 (stats.NamespaceKindStat, kind_name, namespace), 0,
456 composite_index_count=index_count,
457 composite_index_size=index_size,
458 kind_name=kind_name)
461 index_id = index.id()
462 self.__Increment(self.whole_app_stats, index_count,
463 (stats.KindCompositeIndexStat,
464 kind_name + '_%s' % index_id, ''), index_size,
465 kind_name=kind_name, index_id=index_id)
467 self.__Increment(self.namespace_stats, index_count,
468 (stats.NamespaceKindCompositeIndexStat,
469 kind_name + '_%s' % index_id, namespace), index_size,
470 kind_name=kind_name, index_id=index_id)
472 def __AggregateTotal(self, size, key, proto, namespace, stat_kind):
473 """Aggregate total datastore stats."""
474 kind_name = key.kind()
476 entity_key_size = (len(proto.key().app()) +
477 len(proto.key().path().SerializeToString()) +
478 len(proto.entity_group().SerializeToString()))
480 type_index_size, type_index_count = self.__GetTypeIndexStat(namespace,
481 kind_name,
482 entity_key_size)
483 property_index_count = 0
484 property_index_size = 0
485 for prop_list in (proto.property_list(), proto.raw_property_list()):
486 for prop in prop_list:
487 index_size, index_count = self.__GetPropertyIndexStat(namespace,
488 kind_name,
489 entity_key_size,
490 prop)
491 property_index_size += index_size
492 property_index_count += index_count
494 builtin_index_size = type_index_size + property_index_size
495 builtin_index_count = type_index_count + property_index_count
498 if stat_kind == stats.GlobalStat:
499 count = 0
500 else:
501 count = 1
504 self.__Increment(self.whole_app_stats, count, _GLOBAL_KEY, size,
505 builtin_index_count=builtin_index_count,
506 builtin_index_size=builtin_index_size)
509 name_id = namespace
510 if not name_id:
511 name_id = 1
513 if (stat_kind == stats.NamespaceStat) and (namespace == ''):
514 count = 0
517 self.__Increment(self.whole_app_stats, count,
518 (stats.NamespaceStat, name_id, ''),
519 size,
520 builtin_index_count=builtin_index_count,
521 builtin_index_size=builtin_index_size,
522 subject_namespace=namespace)
524 if stat_kind == stats.NamespaceGlobalStat:
525 count = 0
528 self.__Increment(
529 self.namespace_stats, count,
530 (stats.NamespaceGlobalStat, 'total_entity_usage', namespace), size,
531 builtin_index_count=builtin_index_count,
532 builtin_index_size=builtin_index_size)
534 def __Increment(self, stats_dict, count, stat_key, size,
535 builtin_index_count=0, builtin_index_size=0,
536 composite_index_count=0, composite_index_size=0, **kwds):
537 """Increment stats for a particular kind.
539 Args:
540 stats_dict: The dictionary where the entities are held.
541 The entities are keyed by stat_key. e.g. The
542 __Stat_Total__ entity will be found in stats_dict[_GLOBAL_KEY].
543 count: The amount to increment the datastore stat by.
544 stat_key: A tuple of (db.Model of the stat, key value, namespace).
545 size: The "bytes" to increment the size by.
546 builtin_index_count: The bytes of builtin index to add in to a stat.
547 builtin_index_size: The count of builtin index to add in to a stat.
548 composite_index_count: The bytes of composite index to add in to a stat.
549 composite_index_size: The count of composite index to add in to a stat.
550 kwds: Name value pairs that are set on the created entities.
553 if stat_key not in stats_dict:
554 stat_model = stat_key[0](
555 key=datastore_types.Key.from_path(stat_key[0].STORED_KIND_NAME,
556 stat_key[1],
557 namespace=stat_key[2],
558 _app=self.app_id),
559 _app=self.app_id)
560 stats_dict[stat_key] = stat_model
561 for field, value in kwds.iteritems():
562 setattr(stat_model, field, value)
563 stat_model.count = count
564 if size:
565 stat_model.entity_bytes = size
566 if builtin_index_size:
567 stat_model.builtin_index_bytes = builtin_index_size
568 stat_model.builtin_index_count = builtin_index_count
569 if composite_index_size:
570 stat_model.composite_index_bytes = composite_index_size
571 stat_model.composite_index_count = composite_index_count
572 stat_model.bytes = size + builtin_index_size + composite_index_size
573 stat_model.timestamp = self.timestamp
574 else:
575 stat_model = stats_dict[stat_key]
576 stat_model.count += count
577 if size:
578 stat_model.entity_bytes += size
579 if builtin_index_size:
580 stat_model.builtin_index_bytes += builtin_index_size
581 stat_model.builtin_index_count += builtin_index_count
582 if composite_index_size:
583 stat_model.composite_index_bytes += composite_index_size
584 stat_model.composite_index_count += composite_index_count
585 stat_model.bytes += size + builtin_index_size + composite_index_size
587 def __Finalize(self):
588 """Finishes processing, deletes all old stats and writes new ones."""
590 for i in range(0, len(self.old_stat_keys), DELETE_BATCH_SIZE):
591 datastore.Delete(self.old_stat_keys[i:i+DELETE_BATCH_SIZE])
593 self.written = 0
595 for stat in self.whole_app_stats.itervalues():
596 if stat.count or not (isinstance(stat, stats.GlobalStat) or
597 isinstance(stat, stats.NamespaceStat)):
598 stat.put()
599 self.written += 1
603 if self.found_non_empty_namespace:
604 for stat in self.namespace_stats.itervalues():
605 if stat.count or not isinstance(stat, stats.NamespaceGlobalStat):
606 stat.put()
607 self.written += 1
609 def Run(self):
610 """Scans the datastore, computes new stats and writes them."""
611 self.__ScanAllNamespaces()
612 self.__Finalize()
613 return self
615 def Report(self):
616 """Produce a small report about the result."""
617 stat = self.whole_app_stats.get(_GLOBAL_KEY, None)
618 entity_size = 0
619 entity_count = 0
620 builtin_index_size = 0
621 builtin_index_count = 0
622 composite_index_size = 0
623 composite_index_count = 0
624 if stat:
625 entity_size = stat.entity_bytes
626 entity_count = stat.count
627 builtin_index_size = stat.builtin_index_bytes
628 builtin_index_count = stat.builtin_index_count
629 composite_index_size = stat.composite_index_bytes
630 composite_index_count = stat.composite_index_count
632 if not entity_count:
633 entity_count = 1
635 return ('Scanned %d entities of total %d bytes, %d index entries of total '
636 '%d bytes and %d composite index entries of total %d bytes. '
637 'Inserted %d new records.'
638 % (entity_count, entity_size, builtin_index_count,
639 builtin_index_size, composite_index_count, composite_index_size,
640 self.written))