sdk/platform-tools/systrace/catapult/telemetry/telemetry/internal/image_processing/video.py

   1 # Copyright 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import subprocess
   6
   7 from py_utils import cloud_storage  # pylint: disable=import-error
   8
   9 from telemetry.core import platform
  10 from telemetry.util import image_util
  11 from telemetry.util import rgba_color
  12
  13 HIGHLIGHT_ORANGE_FRAME = rgba_color.WEB_PAGE_TEST_ORANGE
  14
  15 class BoundingBoxNotFoundException(Exception):
  16   pass
  17
  18
  19 class Video(object):
  20   """Utilities for storing and interacting with the video capture."""
  21
  22   def __init__(self, video_file_obj):
  23     assert video_file_obj.delete
  24     assert not video_file_obj.close_called
  25     self._video_file_obj = video_file_obj
  26     self._tab_contents_bounding_box = None
  27
  28   def UploadToCloudStorage(self, bucket, target_path):
  29     """Uploads video file to cloud storage.
  30
  31     Args:
  32       target_path: Path indicating where to store the file in cloud storage.
  33     """
  34     cloud_storage.Insert(bucket, target_path, self._video_file_obj.name)
  35
  36   def GetVideoFrameIter(self):
  37     """Returns the iteration for processing the video capture.
  38
  39     This looks for the initial color flash in the first frame to establish the
  40     tab content boundaries and then omits all frames displaying the flash.
  41
  42     Yields:
  43       (time_ms, image) tuples representing each video keyframe. Only the first
  44       frame is a run of sequential duplicate bitmaps is typically included.
  45         time_ms is milliseconds since navigationStart.
  46         image may be a telemetry.core.Bitmap, or a numpy array depending on
  47         whether numpy is installed.
  48     """
  49     frame_generator = self._FramesFromMp4(self._video_file_obj.name)
  50
  51     # Flip through frames until we find the initial tab contents flash.
  52     content_box = None
  53     for _, bmp in frame_generator:
  54       content_box = self._FindHighlightBoundingBox(
  55           bmp, HIGHLIGHT_ORANGE_FRAME)
  56       if content_box:
  57         break
  58
  59     if not content_box:
  60       raise BoundingBoxNotFoundException(
  61           'Failed to identify tab contents in video capture.')
  62
  63     # Flip through frames until the flash goes away and emit that as frame 0.
  64     timestamp = 0
  65     for timestamp, bmp in frame_generator:
  66       if not self._FindHighlightBoundingBox(bmp, HIGHLIGHT_ORANGE_FRAME):
  67         yield 0, image_util.Crop(bmp, *content_box)
  68         break
  69
  70     start_time = timestamp
  71     for timestamp, bmp in frame_generator:
  72       yield timestamp - start_time, image_util.Crop(bmp, *content_box)
  73
  74   def _FindHighlightBoundingBox(self, bmp, color, bounds_tolerance=8,
  75                                 color_tolerance=8):
  76     """Returns the bounding box of the content highlight of the given color.
  77
  78     Raises:
  79       BoundingBoxNotFoundException if the hightlight could not be found.
  80     """
  81     content_box, pixel_count = image_util.GetBoundingBox(bmp, color,
  82         tolerance=color_tolerance)
  83
  84     if not content_box:
  85       return None
  86
  87     # We assume arbitrarily that tabs are all larger than 200x200. If this
  88     # fails it either means that assumption has changed or something is
  89     # awry with our bounding box calculation.
  90     if content_box[2] < 200 or content_box[3] < 200:
  91       raise BoundingBoxNotFoundException('Unexpectedly small tab contents.')
  92
  93     # TODO(tonyg): Can this threshold be increased?
  94     if pixel_count < 0.9 * content_box[2] * content_box[3]:
  95       raise BoundingBoxNotFoundException(
  96           'Low count of pixels in tab contents matching expected color.')
  97
  98     # Since we allow some fuzziness in bounding box finding, we want to make
  99     # sure that the bounds are always stable across a run. So we cache the
 100     # first box, whatever it may be.
 101     #
 102     # This relies on the assumption that since Telemetry doesn't know how to
 103     # resize the window, we should always get the same content box for a tab.
 104     # If this assumption changes, this caching needs to be reworked.
 105     if not self._tab_contents_bounding_box:
 106       self._tab_contents_bounding_box = content_box
 107
 108     # Verify that there is only minor variation in the bounding box. If it's
 109     # just a few pixels, we can assume it's due to compression artifacts.
 110     for x, y in zip(self._tab_contents_bounding_box, content_box):
 111       if abs(x - y) > bounds_tolerance:
 112         # If this fails, it means either that either the above assumption has
 113         # changed or something is awry with our bounding box calculation.
 114         raise BoundingBoxNotFoundException(
 115             'Unexpected change in tab contents box.')
 116
 117     return self._tab_contents_bounding_box
 118
 119   def _FramesFromMp4(self, mp4_file):
 120     host_platform = platform.GetHostPlatform()
 121     if not host_platform.CanLaunchApplication('avconv'):
 122       host_platform.InstallApplication('avconv')
 123
 124     def GetDimensions(video):
 125       proc = subprocess.Popen(['avconv', '-i', video], stderr=subprocess.PIPE)
 126       dimensions = None
 127       output = ''
 128       for line in proc.stderr.readlines():
 129         output += line
 130         if 'Video:' in line:
 131           dimensions = line.split(',')[2]
 132           dimensions = map(int, dimensions.split()[0].split('x'))
 133           break
 134       proc.communicate()
 135       assert dimensions, ('Failed to determine video dimensions. output=%s' %
 136                           output)
 137       return dimensions
 138
 139     def GetFrameTimestampMs(stderr):
 140       """Returns the frame timestamp in integer milliseconds from the dump log.
 141
 142       The expected line format is:
 143       '  dts=1.715  pts=1.715\n'
 144
 145       We have to be careful to only read a single timestamp per call to avoid
 146       deadlock because avconv interleaves its writes to stdout and stderr.
 147       """
 148       while True:
 149         line = ''
 150         next_char = ''
 151         while next_char != '\n':
 152           next_char = stderr.read(1)
 153           line += next_char
 154         if 'pts=' in line:
 155           return int(1000 * float(line.split('=')[-1]))
 156
 157     dimensions = GetDimensions(mp4_file)
 158     frame_length = dimensions[0] * dimensions[1] * 3
 159     frame_data = bytearray(frame_length)
 160
 161     # Use rawvideo so that we don't need any external library to parse frames.
 162     proc = subprocess.Popen(['avconv', '-i', mp4_file, '-vcodec',
 163                              'rawvideo', '-pix_fmt', 'rgb24', '-dump',
 164                              '-loglevel', 'debug', '-f', 'rawvideo', '-'],
 165                             stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 166     while True:
 167       num_read = proc.stdout.readinto(frame_data)
 168       if not num_read:
 169         raise StopIteration
 170       assert num_read == len(frame_data), 'Unexpected frame size: %d' % num_read
 171       yield (GetFrameTimestampMs(proc.stderr),
 172              image_util.FromRGBPixels(dimensions[0], dimensions[1], frame_data))