components/policy/tools/syntax_check_policy_template_json.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 '''
   7 Checks a policy_templates.json file for conformity to its syntax specification.
   8 '''
   9
  10 import json
  11 import optparse
  12 import os
  13 import re
  14 import sys
  15
  16
  17 LEADING_WHITESPACE = re.compile('^([ \t]*)')
  18 TRAILING_WHITESPACE = re.compile('.*?([ \t]+)$')
  19 # Matches all non-empty strings that contain no whitespaces.
  20 NO_WHITESPACE = re.compile('[^\s]+$')
  21
  22 # Convert a 'type' to the schema types it may be converted to.
  23 # The 'dict' type represents structured JSON data, and can be converted
  24 # to an 'object' or an 'array'.
  25 TYPE_TO_SCHEMA = {
  26   'int': [ 'integer' ],
  27   'list': [ 'array' ],
  28   'dict': [ 'object', 'array' ],
  29   'main': [ 'boolean' ],
  30   'string': [ 'string' ],
  31   'int-enum': [ 'integer' ],
  32   'string-enum': [ 'string' ],
  33   'string-enum-list': [ 'array' ],
  34   'external': [ 'object' ],
  35 }
  36
  37 # List of boolean policies that have been introduced with negative polarity in
  38 # the past and should not trigger the negative polarity check.
  39 LEGACY_INVERTED_POLARITY_WHITELIST = [
  40     'DeveloperToolsDisabled',
  41     'DeviceAutoUpdateDisabled',
  42     'Disable3DAPIs',
  43     'DisableAuthNegotiateCnameLookup',
  44     'DisablePluginFinder',
  45     'DisablePrintPreview',
  46     'DisableSafeBrowsingProceedAnyway',
  47     'DisableScreenshots',
  48     'DisableSpdy',
  49     'DisableSSLRecordSplitting',
  50     'DriveDisabled',
  51     'DriveDisabledOverCellular',
  52     'ExternalStorageDisabled',
  53     'SavingBrowserHistoryDisabled',
  54     'SyncDisabled',
  55 ]
  56
  57 class PolicyTemplateChecker(object):
  58
  59   def __init__(self):
  60     self.error_count = 0
  61     self.warning_count = 0
  62     self.num_policies = 0
  63     self.num_groups = 0
  64     self.num_policies_in_groups = 0
  65     self.options = None
  66     self.features = []
  67
  68   def _Error(self, message, parent_element=None, identifier=None,
  69              offending_snippet=None):
  70     self.error_count += 1
  71     error = ''
  72     if identifier is not None and parent_element is not None:
  73       error += 'In %s %s: ' % (parent_element, identifier)
  74     print error + 'Error: ' + message
  75     if offending_snippet is not None:
  76       print '  Offending:', json.dumps(offending_snippet, indent=2)
  77
  78   def _CheckContains(self, container, key, value_type,
  79                      optional=False,
  80                      parent_element='policy',
  81                      container_name=None,
  82                      identifier=None,
  83                      offending='__CONTAINER__',
  84                      regexp_check=None):
  85     '''
  86     Checks |container| for presence of |key| with value of type |value_type|.
  87     If |value_type| is string and |regexp_check| is specified, then an error is
  88     reported when the value does not match the regular expression object.
  89
  90     |value_type| can also be a list, if more than one type is supported.
  91
  92     The other parameters are needed to generate, if applicable, an appropriate
  93     human-readable error message of the following form:
  94
  95     In |parent_element| |identifier|:
  96       (if the key is not present):
  97       Error: |container_name| must have a |value_type| named |key|.
  98       Offending snippet: |offending| (if specified; defaults to |container|)
  99       (if the value does not have the required type):
 100       Error: Value of |key| must be a |value_type|.
 101       Offending snippet: |container[key]|
 102
 103     Returns: |container[key]| if the key is present, None otherwise.
 104     '''
 105     if identifier is None:
 106       try:
 107         identifier = container.get('name')
 108       except:
 109         self._Error('Cannot access container name of "%s".' % container_name)
 110         return None
 111     if container_name is None:
 112       container_name = parent_element
 113     if offending == '__CONTAINER__':
 114       offending = container
 115     if key not in container:
 116       if optional:
 117         return
 118       else:
 119         self._Error('%s must have a %s "%s".' %
 120                     (container_name.title(), value_type.__name__, key),
 121                     container_name, identifier, offending)
 122       return None
 123     value = container[key]
 124     value_types = value_type if isinstance(value_type, list) else [ value_type ]
 125     if not any(isinstance(value, type) for type in value_types):
 126       self._Error('Value of "%s" must be one of [ %s ].' %
 127                   (key, ', '.join([type.__name__ for type in value_types])),
 128                   container_name, identifier, value)
 129     if str in value_types and regexp_check and not regexp_check.match(value):
 130       self._Error('Value of "%s" must match "%s".' %
 131                   (key, regexp_check.pattern),
 132                   container_name, identifier, value)
 133     return value
 134
 135   def _AddPolicyID(self, id, policy_ids, policy):
 136     '''
 137     Adds |id| to |policy_ids|. Generates an error message if the
 138     |id| exists already; |policy| is needed for this message.
 139     '''
 140     if id in policy_ids:
 141       self._Error('Duplicate id', 'policy', policy.get('name'),
 142                   id)
 143     else:
 144       policy_ids.add(id)
 145
 146   def _CheckPolicyIDs(self, policy_ids):
 147     '''
 148     Checks a set of policy_ids to make sure it contains a continuous range
 149     of entries (i.e. no holes).
 150     Holes would not be a technical problem, but we want to ensure that nobody
 151     accidentally omits IDs.
 152     '''
 153     for i in range(len(policy_ids)):
 154       if (i + 1) not in policy_ids:
 155         self._Error('No policy with id: %s' % (i + 1))
 156
 157   def _CheckPolicySchema(self, policy, policy_type):
 158     '''Checks that the 'schema' field matches the 'type' field.'''
 159     self._CheckContains(policy, 'schema', dict)
 160     if isinstance(policy.get('schema'), dict):
 161       self._CheckContains(policy['schema'], 'type', str)
 162       schema_type = policy['schema'].get('type')
 163       if schema_type not in TYPE_TO_SCHEMA[policy_type]:
 164         self._Error('Schema type must match the existing type for policy %s' %
 165                     policy.get('name'))
 166
 167       # Checks that boolean policies are not negated (which makes them harder to
 168       # reason about).
 169       if (schema_type == 'boolean' and
 170           'disable' in policy.get('name').lower() and
 171           policy.get('name') not in LEGACY_INVERTED_POLARITY_WHITELIST):
 172         self._Error(('Boolean policy %s uses negative polarity, please make ' +
 173                      'new boolean policies follow the XYZEnabled pattern. ' +
 174                      'See also http://crbug.com/85687') % policy.get('name'))
 175
 176
 177   def _CheckPolicy(self, policy, is_in_group, policy_ids):
 178     if not isinstance(policy, dict):
 179       self._Error('Each policy must be a dictionary.', 'policy', None, policy)
 180       return
 181
 182     # There should not be any unknown keys in |policy|.
 183     for key in policy:
 184       if key not in ('name', 'type', 'caption', 'desc', 'device_only',
 185                      'supported_on', 'label', 'policies', 'items',
 186                      'example_value', 'features', 'deprecated', 'future',
 187                      'id', 'schema', 'max_size'):
 188         self.warning_count += 1
 189         print ('In policy %s: Warning: Unknown key: %s' %
 190                (policy.get('name'), key))
 191
 192     # Each policy must have a name.
 193     self._CheckContains(policy, 'name', str, regexp_check=NO_WHITESPACE)
 194
 195     # Each policy must have a type.
 196     policy_types = ('group', 'main', 'string', 'int', 'list', 'int-enum',
 197                     'string-enum', 'string-enum-list', 'dict', 'external')
 198     policy_type = self._CheckContains(policy, 'type', str)
 199     if policy_type not in policy_types:
 200       self._Error('Policy type must be one of: ' + ', '.join(policy_types),
 201                   'policy', policy.get('name'), policy_type)
 202       return  # Can't continue for unsupported type.
 203
 204     # Each policy must have a caption message.
 205     self._CheckContains(policy, 'caption', str)
 206
 207     # Each policy must have a description message.
 208     self._CheckContains(policy, 'desc', str)
 209
 210     # If 'label' is present, it must be a string.
 211     self._CheckContains(policy, 'label', str, True)
 212
 213     # If 'deprecated' is present, it must be a bool.
 214     self._CheckContains(policy, 'deprecated', bool, True)
 215
 216     # If 'future' is present, it must be a bool.
 217     self._CheckContains(policy, 'future', bool, True)
 218
 219     if policy_type == 'group':
 220       # Groups must not be nested.
 221       if is_in_group:
 222         self._Error('Policy groups must not be nested.', 'policy', policy)
 223
 224       # Each policy group must have a list of policies.
 225       policies = self._CheckContains(policy, 'policies', list)
 226
 227       # Check sub-policies.
 228       if policies is not None:
 229         for nested_policy in policies:
 230           self._CheckPolicy(nested_policy, True, policy_ids)
 231
 232       # Groups must not have an |id|.
 233       if 'id' in policy:
 234         self._Error('Policies of type "group" must not have an "id" field.',
 235                     'policy', policy)
 236
 237       # Statistics.
 238       self.num_groups += 1
 239
 240     else:  # policy_type != group
 241       # Each policy must have a protobuf ID.
 242       id = self._CheckContains(policy, 'id', int)
 243       self._AddPolicyID(id, policy_ids, policy)
 244
 245       # 'schema' is the new 'type'.
 246       # TODO(joaodasilva): remove the 'type' checks once 'schema' is used
 247       # everywhere.
 248       self._CheckPolicySchema(policy, policy_type)
 249
 250       # Each policy must have a supported_on list.
 251       supported_on = self._CheckContains(policy, 'supported_on', list)
 252       if supported_on is not None:
 253         for s in supported_on:
 254           if not isinstance(s, str):
 255             self._Error('Entries in "supported_on" must be strings.', 'policy',
 256                         policy, supported_on)
 257
 258       # Each policy must have a 'features' dict.
 259       features = self._CheckContains(policy, 'features', dict)
 260
 261       # All the features must have a documenting message.
 262       if features:
 263         for feature in features:
 264           if not feature in self.features:
 265             self._Error('Unknown feature "%s". Known features must have a '
 266                         'documentation string in the messages dictionary.' %
 267                         feature, 'policy', policy.get('name', policy))
 268
 269       # All user policies must have a per_profile feature flag.
 270       if (not policy.get('device_only', False) and
 271           not policy.get('deprecated', False) and
 272           not filter(re.compile('^chrome_frame:.*').match, supported_on)):
 273         self._CheckContains(features, 'per_profile', bool,
 274                             container_name='features',
 275                             identifier=policy.get('name'))
 276
 277       # All policies must declare whether they allow changes at runtime.
 278       self._CheckContains(features, 'dynamic_refresh', bool,
 279                           container_name='features',
 280                           identifier=policy.get('name'))
 281
 282       # Each policy must have an 'example_value' of appropriate type.
 283       if policy_type == 'main':
 284         value_type = item_type = bool
 285       elif policy_type in ('string', 'string-enum'):
 286         value_type = item_type = str
 287       elif policy_type in ('int', 'int-enum'):
 288         value_type = item_type = int
 289       elif policy_type in ('list', 'string-enum-list'):
 290         value_type = list
 291         item_type = str
 292       elif policy_type == 'external':
 293         value_type = item_type = dict
 294       elif policy_type == 'dict':
 295         value_type = item_type = [ dict, list ]
 296       else:
 297         raise NotImplementedError('Unimplemented policy type: %s' % policy_type)
 298       self._CheckContains(policy, 'example_value', value_type)
 299
 300       # Statistics.
 301       self.num_policies += 1
 302       if is_in_group:
 303         self.num_policies_in_groups += 1
 304
 305     if policy_type in ('int-enum', 'string-enum', 'string-enum-list'):
 306       # Enums must contain a list of items.
 307       items = self._CheckContains(policy, 'items', list)
 308       if items is not None:
 309         if len(items) < 1:
 310           self._Error('"items" must not be empty.', 'policy', policy, items)
 311         for item in items:
 312           # Each item must have a name.
 313           # Note: |policy.get('name')| is used instead of |policy['name']|
 314           # because it returns None rather than failing when no key called
 315           # 'name' exists.
 316           self._CheckContains(item, 'name', str, container_name='item',
 317                               identifier=policy.get('name'),
 318                               regexp_check=NO_WHITESPACE)
 319
 320           # Each item must have a value of the correct type.
 321           self._CheckContains(item, 'value', item_type, container_name='item',
 322                               identifier=policy.get('name'))
 323
 324           # Each item must have a caption.
 325           self._CheckContains(item, 'caption', str, container_name='item',
 326                               identifier=policy.get('name'))
 327
 328     if policy_type == 'external':
 329       # Each policy referencing external data must specify a maximum data size.
 330       self._CheckContains(policy, 'max_size', int)
 331
 332   def _CheckMessage(self, key, value):
 333     # |key| must be a string, |value| a dict.
 334     if not isinstance(key, str):
 335       self._Error('Each message key must be a string.', 'message', key, key)
 336       return
 337
 338     if not isinstance(value, dict):
 339       self._Error('Each message must be a dictionary.', 'message', key, value)
 340       return
 341
 342     # Each message must have a desc.
 343     self._CheckContains(value, 'desc', str, parent_element='message',
 344                         identifier=key)
 345
 346     # Each message must have a text.
 347     self._CheckContains(value, 'text', str, parent_element='message',
 348                         identifier=key)
 349
 350     # There should not be any unknown keys in |value|.
 351     for vkey in value:
 352       if vkey not in ('desc', 'text'):
 353         self.warning_count += 1
 354         print 'In message %s: Warning: Unknown key: %s' % (key, vkey)
 355
 356   def _LeadingWhitespace(self, line):
 357     match = LEADING_WHITESPACE.match(line)
 358     if match:
 359       return match.group(1)
 360     return ''
 361
 362   def _TrailingWhitespace(self, line):
 363     match = TRAILING_WHITESPACE.match(line)
 364     if match:
 365       return match.group(1)
 366     return ''
 367
 368   def _LineError(self, message, line_number):
 369     self.error_count += 1
 370     print 'In line %d: Error: %s' % (line_number, message)
 371
 372   def _LineWarning(self, message, line_number):
 373     self.warning_count += 1
 374     print ('In line %d: Warning: Automatically fixing formatting: %s'
 375            % (line_number, message))
 376
 377   def _CheckFormat(self, filename):
 378     if self.options.fix:
 379       fixed_lines = []
 380     with open(filename) as f:
 381       indent = 0
 382       line_number = 0
 383       for line in f:
 384         line_number += 1
 385         line = line.rstrip('\n')
 386         # Check for trailing whitespace.
 387         trailing_whitespace = self._TrailingWhitespace(line)
 388         if len(trailing_whitespace) > 0:
 389           if self.options.fix:
 390             line = line.rstrip()
 391             self._LineWarning('Trailing whitespace.', line_number)
 392           else:
 393             self._LineError('Trailing whitespace.', line_number)
 394         if self.options.fix:
 395           if len(line) == 0:
 396             fixed_lines += ['\n']
 397             continue
 398         else:
 399           if line == trailing_whitespace:
 400             # This also catches the case of an empty line.
 401             continue
 402         # Check for correct amount of leading whitespace.
 403         leading_whitespace = self._LeadingWhitespace(line)
 404         if leading_whitespace.count('\t') > 0:
 405           if self.options.fix:
 406             leading_whitespace = leading_whitespace.replace('\t', '  ')
 407             line = leading_whitespace + line.lstrip()
 408             self._LineWarning('Tab character found.', line_number)
 409           else:
 410             self._LineError('Tab character found.', line_number)
 411         if line[len(leading_whitespace)] in (']', '}'):
 412           indent -= 2
 413         if line[0] != '#':  # Ignore 0-indented comments.
 414           if len(leading_whitespace) != indent:
 415             if self.options.fix:
 416               line = ' ' * indent + line.lstrip()
 417               self._LineWarning('Indentation should be ' + str(indent) +
 418                                 ' spaces.', line_number)
 419             else:
 420               self._LineError('Bad indentation. Should be ' + str(indent) +
 421                               ' spaces.', line_number)
 422         if line[-1] in ('[', '{'):
 423           indent += 2
 424         if self.options.fix:
 425           fixed_lines.append(line + '\n')
 426
 427     # If --fix is specified: backup the file (deleting any existing backup),
 428     # then write the fixed version with the old filename.
 429     if self.options.fix:
 430       if self.options.backup:
 431         backupfilename = filename + '.bak'
 432         if os.path.exists(backupfilename):
 433           os.remove(backupfilename)
 434         os.rename(filename, backupfilename)
 435       with open(filename, 'w') as f:
 436         f.writelines(fixed_lines)
 437
 438   def Main(self, filename, options):
 439     try:
 440       with open(filename) as f:
 441         data = eval(f.read())
 442     except:
 443       import traceback
 444       traceback.print_exc(file=sys.stdout)
 445       self._Error('Invalid Python/JSON syntax.')
 446       return 1
 447     if data == None:
 448       self._Error('Invalid Python/JSON syntax.')
 449       return 1
 450     self.options = options
 451
 452     # First part: check JSON structure.
 453
 454     # Check (non-policy-specific) message definitions.
 455     messages = self._CheckContains(data, 'messages', dict,
 456                                    parent_element=None,
 457                                    container_name='The root element',
 458                                    offending=None)
 459     if messages is not None:
 460       for message in messages:
 461         self._CheckMessage(message, messages[message])
 462         if message.startswith('doc_feature_'):
 463           self.features.append(message[12:])
 464
 465     # Check policy definitions.
 466     policy_definitions = self._CheckContains(data, 'policy_definitions', list,
 467                                              parent_element=None,
 468                                              container_name='The root element',
 469                                              offending=None)
 470     if policy_definitions is not None:
 471       policy_ids = set()
 472       for policy in policy_definitions:
 473         self._CheckPolicy(policy, False, policy_ids)
 474       self._CheckPolicyIDs(policy_ids)
 475
 476     # Second part: check formatting.
 477     self._CheckFormat(filename)
 478
 479     # Third part: summary and exit.
 480     print ('Finished checking %s. %d errors, %d warnings.' %
 481         (filename, self.error_count, self.warning_count))
 482     if self.options.stats:
 483       if self.num_groups > 0:
 484         print ('%d policies, %d of those in %d groups (containing on '
 485                'average %.1f policies).' %
 486                (self.num_policies, self.num_policies_in_groups, self.num_groups,
 487                  (1.0 * self.num_policies_in_groups / self.num_groups)))
 488       else:
 489         print self.num_policies, 'policies, 0 policy groups.'
 490     if self.error_count > 0:
 491       return 1
 492     return 0
 493
 494   def Run(self, argv, filename=None):
 495     parser = optparse.OptionParser(
 496         usage='usage: %prog [options] filename',
 497         description='Syntax check a policy_templates.json file.')
 498     parser.add_option('--fix', action='store_true',
 499                       help='Automatically fix formatting.')
 500     parser.add_option('--backup', action='store_true',
 501                       help='Create backup of original file (before fixing).')
 502     parser.add_option('--stats', action='store_true',
 503                       help='Generate statistics.')
 504     (options, args) = parser.parse_args(argv)
 505     if filename is None:
 506       if len(args) != 2:
 507         parser.print_help()
 508         sys.exit(1)
 509       filename = args[1]
 510     return self.Main(filename, options)
 511
 512
 513 if __name__ == '__main__':
 514   sys.exit(PolicyTemplateChecker().Run(sys.argv))