Bug 1613556 [wpt PR 21619] - Revert "Major new manifest version (v8): path trie editi...
[gecko.git] / testing / web-platform / tests / tools / manifest / manifest.py
blobbfac309c86a7b5c04ad2d25ee79c82c79c16a29a
1 import json
2 import os
3 from collections import MutableMapping
4 from six import iteritems, iterkeys, itervalues, string_types, binary_type, text_type
6 from . import vcs
7 from .item import (ConformanceCheckerTest, ManifestItem, ManualTest, RefTest, SupportFile,
8 TestharnessTest, VisualTest, WebDriverSpecTest, CrashTest)
9 from .log import get_logger
10 from .sourcefile import SourceFile
11 from .utils import from_os_path, to_os_path
13 MYPY = False
14 if MYPY:
15 # MYPY is set to True when run under Mypy.
16 from logging import Logger
17 from typing import Any
18 from typing import Container
19 from typing import Dict
20 from typing import IO
21 from typing import Iterable
22 from typing import Iterator
23 from typing import List
24 from typing import Optional
25 from typing import Set
26 from typing import Text
27 from typing import Tuple
28 from typing import Type
29 from typing import Union
31 try:
32 import ujson
33 fast_json = ujson
34 except ImportError:
35 fast_json = json # type: ignore
37 CURRENT_VERSION = 7
40 class ManifestError(Exception):
41 pass
44 class ManifestVersionMismatch(ManifestError):
45 pass
48 item_classes = {"testharness": TestharnessTest,
49 "reftest": RefTest,
50 "crashtest": CrashTest,
51 "manual": ManualTest,
52 "wdspec": WebDriverSpecTest,
53 "conformancechecker": ConformanceCheckerTest,
54 "visual": VisualTest,
55 "support": SupportFile} # type: Dict[str, Type[ManifestItem]]
58 if MYPY:
59 TypeDataType = MutableMapping[Text, Set[ManifestItem]]
60 else:
61 TypeDataType = MutableMapping
63 class TypeData(TypeDataType):
64 def __init__(self, manifest, type_cls):
65 # type: (Manifest, Type[ManifestItem]) -> None
66 """Dict-like object containing the TestItems for each test type.
68 Loading an actual Item class for each test is unnecessarily
69 slow, so this class allows lazy-loading of the test
70 items. When the manifest is loaded we store the raw json
71 corresponding to the test type, and only create an Item
72 subclass when the test is accessed. In order to remain
73 API-compatible with consumers that depend on getting an Item
74 from iteration, we do egerly load all items when iterating
75 over the class."""
76 self.manifest = manifest
77 self.type_cls = type_cls
78 self.json_data = {} # type: Optional[Dict[Text, List[Any]]]
79 self.tests_root = None # type: Optional[str]
80 self.data = {} # type: Dict[Text, Set[ManifestItem]]
82 def __getitem__(self, key):
83 # type: (Text) -> Set[ManifestItem]
84 if key not in self.data and self.json_data is not None:
85 self.load(key)
86 return self.data[key]
88 def __nonzero__(self):
89 # type: () -> bool
90 return bool(self.data) or bool(self.json_data)
92 def __len__(self):
93 # type: () -> int
94 rv = len(self.data)
95 if self.json_data is not None:
96 rv += len(self.json_data)
97 return rv
99 def __delitem__(self, key):
100 # type: (Text) -> None
101 if key in self.data:
102 del self.data[key]
103 elif self.json_data is not None:
104 del self.json_data[from_os_path(key)]
105 else:
106 raise KeyError
108 def __setitem__(self, key, value):
109 # type: (Text, Set[ManifestItem]) -> None
110 if self.json_data is not None:
111 path = from_os_path(key)
112 if path in self.json_data:
113 del self.json_data[path]
114 self.data[key] = value
116 def __contains__(self, key):
117 # type: (Any) -> bool
118 self.load_all()
119 return key in self.data
121 def __iter__(self):
122 # type: () -> Iterator[Text]
123 self.load_all()
124 return self.data.__iter__()
126 def itervalues(self):
127 # type: () -> Iterator[Set[ManifestItem]]
128 self.load_all()
129 return itervalues(self.data)
131 def iteritems(self):
132 # type: () -> Iterator[Tuple[Text, Set[ManifestItem]]]
133 self.load_all()
134 return iteritems(self.data)
136 def values(self):
137 # type: () -> List[Set[ManifestItem]]
138 return list(self.itervalues())
140 def items(self):
141 # type: () -> List[Tuple[Text, Set[ManifestItem]]]
142 return list(self.iteritems())
144 def load(self, key):
145 # type: (Text) -> None
146 """Load a specific Item given a path"""
147 if self.json_data is not None:
148 data = set()
149 path = from_os_path(key)
150 for test in self.json_data.get(path, []):
151 manifest_item = self.type_cls.from_json(self.manifest, path, test)
152 data.add(manifest_item)
153 try:
154 del self.json_data[path]
155 except KeyError:
156 pass
157 self.data[key] = data
158 else:
159 raise ValueError
161 def load_all(self):
162 # type: () -> None
163 """Load all test items in this class"""
164 if self.json_data is not None:
165 for path, value in iteritems(self.json_data):
166 key = to_os_path(path)
167 if key in self.data:
168 continue
169 data = set()
170 for test in self.json_data.get(path, []):
171 manifest_item = self.type_cls.from_json(self.manifest, path, test)
172 data.add(manifest_item)
173 self.data[key] = data
174 self.json_data = None
176 def set_json(self, tests_root, data):
177 # type: (str, Dict[Text, Any]) -> None
178 if not isinstance(data, dict):
179 raise ValueError("Got a %s expected a dict" % (type(data)))
180 self.tests_root = tests_root
181 self.json_data = data
183 def to_json(self):
184 # type: () -> Dict[Text, Any]
185 data = {
186 from_os_path(path):
187 [t for t in sorted(test.to_json() for test in tests)]
188 for path, tests in iteritems(self.data)
191 if self.json_data is not None:
192 if not data:
193 # avoid copying if there's nothing here yet
194 return self.json_data
195 data.update(self.json_data)
197 return data
199 def paths(self):
200 # type: () -> Set[Text]
201 """Get a list of all paths containing items of this type,
202 without actually constructing all the items"""
203 rv = set(iterkeys(self.data))
204 if self.json_data:
205 rv |= {to_os_path(item) for item in iterkeys(self.json_data)}
206 return rv
209 if MYPY:
210 ManifestDataType = Dict[Any, TypeData]
211 else:
212 ManifestDataType = dict
214 class ManifestData(ManifestDataType):
215 def __init__(self, manifest):
216 # type: (Manifest) -> None
217 """Dictionary subclass containing a TypeData instance for each test type,
218 keyed by type name"""
219 self.initialized = False # type: bool
220 for key, value in iteritems(item_classes):
221 self[key] = TypeData(manifest, value)
222 self.initialized = True
223 self.json_obj = None # type: None
225 def __setitem__(self, key, value):
226 # type: (str, TypeData) -> None
227 if self.initialized:
228 raise AttributeError
229 dict.__setitem__(self, key, value)
231 def paths(self):
232 # type: () -> Set[Text]
233 """Get a list of all paths containing test items
234 without actually constructing all the items"""
235 rv = set() # type: Set[Text]
236 for item_data in itervalues(self):
237 rv |= set(item_data.paths())
238 return rv
241 class Manifest(object):
242 def __init__(self, tests_root=None, url_base="/"):
243 # type: (Optional[str], Text) -> None
244 assert url_base is not None
245 self._path_hash = {} # type: Dict[Text, Tuple[Text, Text]]
246 self._data = ManifestData(self) # type: ManifestData
247 self.tests_root = tests_root # type: Optional[str]
248 self.url_base = url_base # type: Text
250 def __iter__(self):
251 # type: () -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
252 return self.itertypes()
254 def itertypes(self, *types):
255 # type: (*str) -> Iterator[Tuple[str, Text, Set[ManifestItem]]]
256 for item_type in (types or sorted(self._data.keys())):
257 for path in sorted(self._data[item_type]):
258 tests = self._data[item_type][path]
259 yield item_type, path, tests
261 def iterpath(self, path):
262 # type: (Text) -> Iterator[ManifestItem]
263 for type_tests in self._data.values():
264 i = type_tests.get(path, set())
265 assert i is not None
266 for test in i:
267 yield test
269 def iterdir(self, dir_name):
270 # type: (Text) -> Iterator[ManifestItem]
271 if not dir_name.endswith(os.path.sep):
272 dir_name = dir_name + os.path.sep
273 for type_tests in self._data.values():
274 for path, tests in type_tests.iteritems():
275 if path.startswith(dir_name):
276 for test in tests:
277 yield test
279 def update(self, tree):
280 # type: (Iterable[Tuple[Union[SourceFile, bytes], bool]]) -> bool
281 """Update the manifest given an iterable of items that make up the updated manifest.
283 The iterable must either generate tuples of the form (SourceFile, True) for paths
284 that are to be updated, or (path, False) for items that are not to be updated. This
285 unusual API is designed as an optimistaion meaning that SourceFile items need not be
286 constructed in the case we are not updating a path, but the absence of an item from
287 the iterator may be used to remove defunct entries from the manifest."""
288 seen_files = set() # type: Set[Text]
290 changed = False
292 # Create local variable references to these dicts so we avoid the
293 # attribute access in the hot loop below
294 path_hash = self._path_hash # type: Dict[Text, Tuple[Text, Text]]
295 data = self._data
297 prev_files = data.paths() # type: Set[Text]
299 for source_file, update in tree:
300 if not update:
301 assert isinstance(source_file, (binary_type, text_type))
302 rel_path = source_file # type: Text
303 seen_files.add(rel_path)
304 assert rel_path in path_hash
305 old_hash, old_type = path_hash[rel_path] # type: Tuple[Text, Text]
306 else:
307 assert not isinstance(source_file, bytes)
308 rel_path = source_file.rel_path
309 seen_files.add(rel_path)
311 file_hash = source_file.hash # type: Text
313 is_new = rel_path not in path_hash # type: bool
314 hash_changed = False # type: bool
316 if not is_new:
317 old_hash, old_type = path_hash[rel_path]
318 if old_hash != file_hash:
319 hash_changed = True
321 if is_new or hash_changed:
322 new_type, manifest_items = source_file.manifest_items()
323 data[new_type][rel_path] = set(manifest_items)
324 path_hash[rel_path] = (file_hash, new_type)
325 if hash_changed and new_type != old_type:
326 del data[old_type][rel_path]
327 changed = True
329 deleted = prev_files - seen_files
330 if deleted:
331 changed = True
332 for rel_path in deleted:
333 if rel_path in path_hash:
334 _, old_type = path_hash[rel_path]
335 del path_hash[rel_path]
336 try:
337 del data[old_type][rel_path]
338 except KeyError:
339 pass
340 else:
341 for test_data in itervalues(data):
342 if rel_path in test_data:
343 del test_data[rel_path]
345 return changed
347 def to_json(self):
348 # type: () -> Dict[Text, Any]
349 out_items = {
350 test_type: type_paths.to_json()
351 for test_type, type_paths in iteritems(self._data) if type_paths
353 rv = {"url_base": self.url_base,
354 "paths": {from_os_path(k): v for k, v in iteritems(self._path_hash)},
355 "items": out_items,
356 "version": CURRENT_VERSION} # type: Dict[Text, Any]
357 return rv
359 @classmethod
360 def from_json(cls, tests_root, obj, types=None):
361 # type: (str, Dict[Text, Any], Optional[Container[Text]]) -> Manifest
362 version = obj.get("version")
363 if version != CURRENT_VERSION:
364 raise ManifestVersionMismatch
366 self = cls(tests_root, url_base=obj.get("url_base", "/"))
367 if not hasattr(obj, "items") and hasattr(obj, "paths"):
368 raise ManifestError
370 self._path_hash = {to_os_path(k): v for k, v in iteritems(obj["paths"])}
372 # merge reftest_node and reftest
373 # TODO(MANIFESTv8): remove this condition
374 if "reftest_node" in obj["items"]:
375 for path in obj["items"]["reftest_node"]:
376 os_path = to_os_path(path)
377 old_hash, old_type = self._path_hash[os_path]
378 self._path_hash[os_path] = (old_hash, "reftest")
380 for test_type, type_paths in iteritems(obj["items"]):
381 # merge reftest_node and reftest
382 # TODO(MANIFESTv8): remove this condition
383 if test_type in ("reftest", "reftest_node"):
384 if types and "reftest" not in types:
385 continue
387 if self._data["reftest"].json_data:
388 self._data["reftest"].json_data.update(type_paths)
389 else:
390 self._data["reftest"].set_json(tests_root, type_paths)
392 continue
394 if test_type not in item_classes:
395 raise ManifestError
397 if types and test_type not in types:
398 continue
400 self._data[test_type].set_json(tests_root, type_paths)
402 return self
405 def load(tests_root, manifest, types=None):
406 # type: (str, Union[IO[bytes], str], Optional[Container[Text]]) -> Optional[Manifest]
407 logger = get_logger()
409 logger.warning("Prefer load_and_update instead")
410 return _load(logger, tests_root, manifest, types)
413 __load_cache = {} # type: Dict[str, Manifest]
416 def _load(logger, # type: Logger
417 tests_root, # type: str
418 manifest, # type: Union[IO[bytes], str]
419 types=None, # type: Optional[Container[Text]]
420 allow_cached=True # type: bool
422 # type: (...) -> Optional[Manifest]
423 manifest_path = (manifest if isinstance(manifest, string_types)
424 else manifest.name)
425 if allow_cached and manifest_path in __load_cache:
426 return __load_cache[manifest_path]
428 if isinstance(manifest, string_types):
429 if os.path.exists(manifest):
430 logger.debug("Opening manifest at %s" % manifest)
431 else:
432 logger.debug("Creating new manifest at %s" % manifest)
433 try:
434 with open(manifest, "rb") as f:
435 rv = Manifest.from_json(tests_root,
436 fast_json.load(f),
437 types=types)
438 except IOError:
439 return None
440 except ValueError:
441 logger.warning("%r may be corrupted", manifest)
442 return None
443 else:
444 rv = Manifest.from_json(tests_root,
445 fast_json.load(manifest),
446 types=types)
448 if allow_cached:
449 __load_cache[manifest_path] = rv
450 return rv
453 def load_and_update(tests_root, # type: bytes
454 manifest_path, # type: bytes
455 url_base, # type: Text
456 update=True, # type: bool
457 rebuild=False, # type: bool
458 metadata_path=None, # type: Optional[bytes]
459 cache_root=None, # type: Optional[bytes]
460 working_copy=True, # type: bool
461 types=None, # type: Optional[Container[Text]]
462 write_manifest=True, # type: bool
463 allow_cached=True # type: bool
465 # type: (...) -> Manifest
466 logger = get_logger()
468 manifest = None
469 if not rebuild:
470 try:
471 manifest = _load(logger,
472 tests_root,
473 manifest_path,
474 types=types,
475 allow_cached=allow_cached)
476 except ManifestVersionMismatch:
477 logger.info("Manifest version changed, rebuilding")
479 if manifest is not None and manifest.url_base != url_base:
480 logger.info("Manifest url base did not match, rebuilding")
481 manifest = None
483 if manifest is None:
484 manifest = Manifest(tests_root, url_base)
485 rebuild = True
486 update = True
488 if rebuild or update:
489 tree = vcs.get_tree(tests_root, manifest, manifest_path, cache_root,
490 working_copy, rebuild)
491 changed = manifest.update(tree)
492 if write_manifest and changed:
493 write(manifest, manifest_path)
494 tree.dump_caches()
496 return manifest
499 def write(manifest, manifest_path):
500 # type: (Manifest, bytes) -> None
501 dir_name = os.path.dirname(manifest_path)
502 if not os.path.exists(dir_name):
503 os.makedirs(dir_name)
504 with open(manifest_path, "wb") as f:
505 # Use ',' instead of the default ', ' separator to prevent trailing
506 # spaces: https://docs.python.org/2/library/json.html#json.dump
507 json.dump(manifest.to_json(), f,
508 sort_keys=True, indent=1, separators=(',', ': '))
509 f.write("\n")