tools/findit/findit_for_crash.py

   1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 import os
   6 from threading import Lock
   7
   8 import blame
   9 from common import utils
  10 import component_dictionary
  11 import crash_utils
  12 import git_repository_parser
  13 import match_set
  14 import svn_repository_parser
  15
  16
  17 LINE_CHANGE_PRIORITY = 1
  18 FILE_CHANGE_PRIORITY = 2
  19 _THIS_DIR = os.path.abspath(os.path.dirname(__file__))
  20 CONFIG = crash_utils.ParseURLsFromConfig(os.path.join(_THIS_DIR,
  21                                                       'config.ini'))
  22
  23
  24 def GenerateMatchEntry(
  25     matches, revision_info, revision_number, file_path, function,
  26     component_path, component_name, crashed_line_numbers, stack_frame_indices,
  27     file_change_type, repository_parser):
  28   """Generates a match object and adds it to the match set.
  29
  30   Args:
  31     matches: A matchset object, a map from CL to a match object.
  32     revision_info: The revision information, a map from fields (message,
  33                    changed files, etc) to its values.
  34     revision_number: The SVN revision number or git hash.
  35     file_path: The path of the file.
  36     function: The function that caused an crash.
  37     component_path: The path of the component this file is from.
  38     component_name: The name of the component the file is from.
  39     crashed_line_numbers: The list of the lines in the file that caused
  40                           the crash.
  41     stack_frame_indices: The list of positions of this file within a stack.
  42     file_change_type: Whether file is modified, added or deleted.
  43     repository_parser: The parser object to parse line diff.
  44   """
  45   # Check if this CL should be ignored.
  46   with matches.matches_lock:
  47     if revision_number in matches.cls_to_ignore:
  48       return
  49
  50     # If this CL is not already identified as suspected, create a new entry.
  51     if revision_number not in matches.matches:
  52       match = match_set.Match(revision_info, component_name)
  53       message = revision_info['message']
  54       # TODO(jeun): Don't hold lock while issuing http request.
  55       match.ParseMessage(message, matches.codereview_api_url)
  56
  57       # If this match is a revert, add to the set of CLs to be ignored.
  58       if match.is_revert:
  59         matches.cls_to_ignore.add(revision_number)
  60
  61         # If this match has info on what CL it is reverted from, add that CL.
  62         if match.revert_of:
  63           matches.cls_to_ignore.add(match.revert_of)
  64
  65         return
  66
  67       matches.matches[revision_number] = match
  68
  69     else:
  70       match = matches.matches[revision_number]
  71
  72   (diff_url, changed_line_numbers, changed_line_contents) = (
  73       repository_parser.ParseLineDiff(
  74           file_path, component_path, file_change_type, revision_number))
  75
  76   # Ignore this match if the component is not supported for svn.
  77   if not diff_url:
  78     return
  79
  80   # Find the intersection between the lines that this file crashed on and
  81   # the changed lines.
  82   (line_number_intersection, stack_frame_index_intersection, functions) = (
  83       crash_utils.Intersection(
  84           crashed_line_numbers, stack_frame_indices, changed_line_numbers,
  85           function))
  86
  87   # Find the minimum distance between the changed lines and crashed lines.
  88   (min_distance, min_crashed_line, min_changed_line) = \
  89       crash_utils.FindMinLineDistance(crashed_line_numbers,
  90                                       changed_line_numbers)
  91
  92   # Check whether this CL changes the crashed lines or not.
  93   if line_number_intersection:
  94     priority = LINE_CHANGE_PRIORITY
  95   else:
  96     priority = FILE_CHANGE_PRIORITY
  97
  98   # Add the parsed information to the object.
  99   with matches.matches_lock:
 100     match.crashed_line_numbers.append(line_number_intersection)
 101
 102     file_name = file_path.split('/')[-1]
 103     match.changed_files.append(file_name)
 104
 105     # Update the min distance only if it is less than the current one.
 106     if min_distance < match.min_distance:
 107       match.min_distance = min_distance
 108       match.min_distance_info = (file_name, min_crashed_line, min_changed_line)
 109
 110     # If this CL does not change the crashed line, all occurrence of this
 111     # file in the stack has the same priority.
 112     if not stack_frame_index_intersection:
 113       stack_frame_index_intersection = stack_frame_indices
 114       functions = function
 115     match.stack_frame_indices.append(stack_frame_index_intersection)
 116     match.changed_file_urls.append(diff_url)
 117     match.priorities.append(priority)
 118     match.function_list.append(functions)
 119
 120
 121 def FindMatch(revisions_info_map, file_to_revision_info, file_to_crash_info,
 122               component_path, component_name, repository_parser,
 123               codereview_api_url):
 124   """Finds a CL that modifies file in the stacktrace.
 125
 126   Args:
 127     revisions_info_map: A dictionary mapping revision number to the CL
 128                         information.
 129     file_to_revision_info: A dictionary mapping file to the revision that
 130                            modifies it.
 131     file_to_crash_info: A dictionary mapping file to its occurrence in
 132                        stacktrace.
 133     component_path: The path of the component to search for.
 134     component_name: The name of the component to search for.
 135     repository_parser: The parser object to parse the line diff.
 136     codereview_api_url: A code review url to retrieve data from.
 137
 138   Returns:
 139     Matches, a set of match objects.
 140   """
 141   matches = match_set.MatchSet(codereview_api_url)
 142
 143   tasks = []
 144   # Iterate through the crashed files in the stacktrace.
 145   for crashed_file_path in file_to_crash_info:
 146     # Ignore header file.
 147     if crashed_file_path.endswith('.h'):
 148       continue
 149
 150     # If the file in the stacktrace is not changed in any commits, continue.
 151     for changed_file_path in file_to_revision_info:
 152       changed_file_name = changed_file_path.split('/')[-1].lower()
 153       crashed_file_name = crashed_file_path.split('/')[-1].lower()
 154       if changed_file_name != crashed_file_name:
 155         continue
 156
 157       if not crash_utils.GuessIfSameSubPath(
 158           changed_file_path.lower(), crashed_file_path.lower()):
 159         continue
 160
 161       crashed_line_numbers = file_to_crash_info.GetCrashedLineNumbers(
 162           crashed_file_path)
 163       stack_frame_nums = file_to_crash_info.GetCrashStackFrameIndices(
 164           crashed_file_path)
 165       functions = file_to_crash_info.GetCrashFunctions(crashed_file_path)
 166
 167       # Iterate through the CLs that this file path is changed.
 168       for (cl, file_change_type) in file_to_revision_info[changed_file_path]:
 169         # If the file change is delete, ignore this CL.
 170         if file_change_type == 'D':
 171           continue
 172
 173         revision = revisions_info_map[cl]
 174
 175         tasks.append({
 176             'function': GenerateMatchEntry,
 177             'args':[matches, revision, cl, changed_file_path, functions,
 178                     component_path, component_name, crashed_line_numbers,
 179                     stack_frame_nums, file_change_type,
 180                    repository_parser]})
 181
 182   # Run all the tasks.
 183   crash_utils.RunTasks(tasks)
 184
 185   matches.RemoveRevertedCLs()
 186
 187   return matches
 188
 189
 190 def FindMatchForComponent(component_path, file_to_crash_info, changelog,
 191                           callstack_priority, results, results_lock):
 192   """Parses changelog and finds suspected CLs for a given component.
 193
 194   Args:
 195     component_path: The path of component to look for the culprit CL.
 196     file_to_crash_info: A dictionary mapping file to its occurrence in
 197                         stackframe.
 198     changelog: The parsed changelog for this component.
 199     callstack_priority: The priority of this call stack, 0 if from crash stack,
 200                         1 if from freed, 2 if from previously allocated.
 201     results: A dictionary to store the result.
 202     results_lock: A lock that guards results.
 203   """
 204   (repository_parser, component_name, revisions, file_to_revision_map) = \
 205       changelog
 206
 207   # Find match for this component.
 208   codereview_api_url = CONFIG['codereview']['review_url']
 209   component_result = FindMatch(
 210       revisions, file_to_revision_map, file_to_crash_info, component_path,
 211       component_name, repository_parser, codereview_api_url)
 212   matches = component_result.matches
 213
 214   # For all the match results in a dictionary, add to the list so that it
 215   # can be sorted.
 216   with results_lock:
 217     for cl in matches:
 218       match = matches[cl]
 219       results.append((callstack_priority, cl, match))
 220
 221
 222 def FindMatchForCallstack(
 223     callstack, components, component_to_changelog_map, results,
 224     results_lock):
 225   """Finds culprit cl for a stack within a stacktrace.
 226
 227   For each components to look for, create new thread that computes the matches
 228   and join the results at the end.
 229
 230   Args:
 231     callstack: A callstack in a stacktrace to find the result for.
 232     components: A set of components to look for.
 233     component_to_changelog_map: A map from component to its parsed changelog.
 234     results: A list to aggregrate results from all stacktraces.
 235     results_lock: A lock that guards results.
 236   """
 237   # Create component dictionary from the component and call stack.
 238   component_dict = component_dictionary.ComponentDictionary(callstack,
 239                                                             components)
 240   callstack_priority = callstack.priority
 241
 242   # Iterate through all components.
 243   for component_path in component_dict:
 244     # If the component to consider in this callstack is not in the parsed list
 245     # of components, ignore this one.
 246     if component_path not in component_to_changelog_map:
 247       continue
 248
 249     changelog = component_to_changelog_map[component_path]
 250     file_to_crash_info = component_dict.GetFileDict(component_path)
 251     FindMatchForComponent(component_path, file_to_crash_info, changelog,
 252                           callstack_priority, results, results_lock)
 253
 254
 255 def FindMatchForStacktrace(stacktrace, components,
 256                            component_to_regression_dict):
 257   """Finds the culprit CL for stacktrace.
 258
 259   The passed stacktrace is either from release build stacktrace
 260   or debug build stacktrace.
 261
 262   Args:
 263     stacktrace: A list of parsed stacks within a stacktrace.
 264     components: A set of components to look for.
 265     component_to_regression_dict: A dictionary mapping component path to
 266                                   its regression.
 267
 268   Returns:
 269     A list of match results from all stacks.
 270   """
 271   # A list to aggregate results from all the callstacks in the stacktrace.
 272   results = []
 273   results_lock = Lock()
 274
 275   # Setup parsers.
 276   svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
 277   git_parser = git_repository_parser.GitParser(component_to_regression_dict,
 278                                                CONFIG['git'])
 279
 280   # Create a cache of parsed revisions.
 281   component_to_changelog_map = {}
 282   for component_path in components:
 283     component_object = component_to_regression_dict[component_path]
 284     range_start = component_object['old_revision']
 285     range_end = component_object['new_revision']
 286
 287     # If range start is 0, the range is too large and the crash has been
 288     # introduced the archived build, so ignore this case.
 289     if range_start == '0':
 290       continue
 291
 292     component_name = component_to_regression_dict[component_path]['name']
 293
 294     is_git = utils.IsGitHash(range_start)
 295     if is_git:
 296       repository_parser = git_parser
 297     else:
 298       repository_parser = svn_parser
 299
 300     (revisions, file_to_revision_map) = repository_parser.ParseChangelog(
 301         component_path, range_start, range_end)
 302
 303     # If the returned map from ParseChangeLog is empty, we don't need to look
 304     # further because either the parsing failed or the changelog is empty.
 305     if not (revisions and file_to_revision_map):
 306       continue
 307
 308     component_to_changelog_map[component_path] = (repository_parser,
 309                                                   component_name,
 310                                                   revisions,
 311                                                   file_to_revision_map)
 312
 313   # Analyze each of the call stacks in the stacktrace.
 314   for callstack in stacktrace.stack_list:
 315     FindMatchForCallstack(callstack, components, component_to_changelog_map,
 316                           results, results_lock)
 317
 318   return results
 319
 320
 321 def SortMatchesFunction(match_with_stack_priority):
 322   """A function to sort the match triple.
 323
 324   Currently, it sorts the list by:
 325   1) The highest priority file change in the CL (changing crashed line is
 326   higher priority than just changing the file).
 327   2) The callstack this match is computed (crash stack, freed, allocation).
 328   3) The minimum stack frame number of the changed file in the match.
 329   4) The number of files this CL changes (higher the better).
 330   5) The minimum distance between the lines that the CL changes and crashed
 331   lines.
 332
 333   Args:
 334     match_with_stack_priority: A match object, with the CL it is from and what
 335                                callstack it is from.
 336
 337   Returns:
 338     A sort key.
 339   """
 340   (stack_priority, _, match) = match_with_stack_priority
 341
 342   return (min(match.priorities),
 343           stack_priority,
 344           match.min_distance,
 345           crash_utils.FindMinStackFrameNumber(match.stack_frame_indices,
 346                                               match.priorities),
 347           -len(match.changed_files))
 348
 349
 350 def SortAndFilterMatches(matches, num_important_frames=5):
 351   """Filters the list of potential culprit CLs to remove noise.
 352
 353   Args:
 354     matches: A list containing match results.
 355     num_important_frames: A number of frames on the top of the frame to Check
 356                           for when filtering the results. A match with a file
 357                           that is in top num_important_frames of the stacktrace
 358                           is regarded more probable then others.
 359
 360   Returns:
 361     Filtered match results.
 362   """
 363   new_matches = []
 364   line_changed = False
 365   is_important_frame = False
 366   highest_priority_stack = crash_utils.INFINITY
 367   matches.sort(key=SortMatchesFunction)
 368   # Iterate through the matches to find out what results are significant.
 369   for stack_priority, cl, match in matches:
 370     # Check if the current match changes crashed line.
 371     is_line_change = (min(match.priorities) == LINE_CHANGE_PRIORITY)
 372
 373     # Check which stack this match is from, and finds the highest priority
 374     # callstack up to this point.
 375     current_stack = stack_priority
 376     if current_stack < highest_priority_stack:
 377       highest_priority_stack = current_stack
 378
 379     # Check if current match changes a file that occurs in crash state.
 380     flattened_stack_frame_indices = [frame for frame_indices in
 381                                      match.stack_frame_indices
 382                                      for frame in frame_indices]
 383     current_is_important = (
 384         min(flattened_stack_frame_indices) < num_important_frames)
 385
 386     # This match and anything lower than this should be ignored if:
 387     # - Current match does not change crashed lines but there are matches
 388     # that do so.
 389     # - Current match is not in crash state but there are matches in it.
 390     # - There are other matches that came from higher priority stack.
 391     if (line_changed and not is_line_change) or (
 392         is_important_frame and not current_is_important) or (
 393             current_stack > highest_priority_stack):
 394       break
 395
 396     # Update the variables.
 397     if is_line_change:
 398       line_changed = True
 399     if current_is_important:
 400       is_important_frame = True
 401
 402     # Add current match to the filtered result.
 403     new_matches.append((stack_priority, cl, match))
 404
 405   return new_matches
 406
 407
 408 def GenerateReasonForMatches(matches):
 409   """Generates a reason that a match (CL) is a culprit cl.
 410
 411   Args:
 412     matches: A list of match objects.
 413   """
 414   # Iterate through the matches in the list.
 415   for i, _, match in matches:
 416     reason = []
 417
 418     # Zip the files in the match by the reason they are suspected
 419     # (how the file is modified).
 420     match_by_priority = zip(
 421         match.priorities, match.crashed_line_numbers, match.changed_files,
 422         match.stack_frame_indices, match.function_list)
 423
 424     # Sort the zipped changed files in the match by their priority so that the
 425     # changed lines comes first in the reason.
 426     match_by_priority.sort(
 427         key=lambda (priority, crashed_line_numbers, file_name,
 428                     stack_frame_indices, function_list): priority)
 429
 430     # Iterate through the sorted match.
 431     for i in range(len(match_by_priority)):
 432       (priority, crashed_line_numbers, file_name, stack_frame_indices,
 433        function_list) = match_by_priority[i]
 434
 435       # If the file in the match is a line change, append a explanation.
 436       if priority == LINE_CHANGE_PRIORITY:
 437         crashed_line_numbers = [crashed_line_number
 438                                 for lines in crashed_line_numbers
 439                                 for crashed_line_number in lines]
 440         reason.append(
 441             'Lines %s of file %s which potentially caused crash '
 442             'are changed in this cl (%s).\n' %
 443             (utils.JoinLineNumbers(crashed_line_numbers, accepted_gap=4),
 444              file_name,
 445              crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list)))
 446
 447       else:
 448         # Get all the files that are not line change.
 449         rest_of_the_files = match_by_priority[i:]
 450
 451         if len(rest_of_the_files) == 1:
 452           file_string = 'File %s is changed in this cl '
 453         else:
 454           file_string = 'Files %s are changed in this cl '
 455
 456         # Create a list of file names, and prettify the list.
 457         file_names = [
 458             file_name for (_, _, file_name, _, _) in rest_of_the_files]
 459         pretty_file_names = crash_utils.PrettifyList(file_names)
 460
 461         # Add the reason, break because we took care of the rest of the files.
 462         file_string += ('(and is part of stack %s)' %
 463             crash_utils.PrettifyFrameInfo(stack_frame_indices, function_list))
 464         reason.append(file_string % pretty_file_names)
 465         break
 466
 467     # Set the reason as string.
 468     match.reason = '\n'.join(reason)
 469
 470
 471 def CombineMatches(matches):
 472   """Combine possible duplicates in matches.
 473
 474   Args:
 475     matches: A list of matches object, along with its callstack priority and
 476              CL it is from.
 477   Returns:
 478     A combined list of matches.
 479   """
 480   combined_matches = []
 481
 482   for stack_index, cl, match in matches:
 483     found_match = None
 484
 485     # Iterate through the list of combined matches.
 486     for _, cl_combined, match_combined in combined_matches:
 487       # Check for if current CL is already higher up in the result.
 488       if cl == cl_combined:
 489         found_match = match_combined
 490         break
 491
 492     # If current match is not already in, add it to the list of matches.
 493     if not found_match:
 494       combined_matches.append((stack_index, cl, match))
 495       continue
 496
 497     # Combine the reason if the current match is already in there.
 498     found_match.reason += match.reason
 499     if match.min_distance < found_match.min_distance:
 500       found_match.min_distance = match.min_distance
 501       found_match.min_distance_info = match.min_distance_info
 502
 503   for stack_index, cl, match in combined_matches:
 504     if match.min_distance_info:
 505       file_name, min_crashed_line, min_changed_line = match.min_distance_info
 506       match.reason += \
 507           ('\nMinimum distance from crash line to modified line: %d. '
 508            '(file: %s, crashed on: %d, modified: %d).\n' %
 509            (match.min_distance, file_name, min_crashed_line, min_changed_line))
 510
 511   return combined_matches
 512
 513
 514 def FilterAndGenerateReasonForMatches(result):
 515   """A wrapper function.
 516
 517   It generates reasons for the matches and returns string representation
 518   of filtered results.
 519
 520   Args:
 521     result: A list of match objects.
 522
 523   Returns:
 524     A string representation of filtered results.
 525   """
 526   new_result = SortAndFilterMatches(result)
 527   GenerateReasonForMatches(new_result)
 528   combined_matches = CombineMatches(new_result)
 529   return crash_utils.MatchListToResultList(combined_matches)
 530
 531
 532 def ParseCrashComponents(main_stack):
 533   """Parses the crashing component.
 534
 535   Crashing components is a component that top_n_frames of the stacktrace is
 536   from.
 537
 538   Args:
 539     main_stack: Main stack from the stacktrace.
 540
 541   Returns:
 542     A set of components.
 543   """
 544   components = set()
 545
 546   for frame in main_stack.frame_list:
 547     components.add(frame.component_path)
 548
 549   return components
 550
 551
 552 def GenerateAndFilterBlameList(callstack, component_to_crash_revision_dict,
 553                                component_to_regression_dict):
 554   """A wrapper function.
 555
 556   Finds blame information for stack and returns string representation.
 557
 558   Args:
 559     callstack: A callstack to find the blame information.
 560     component_to_crash_revision_dict: A dictionary mapping component to its
 561                                       crash revision.
 562     component_to_regression_dict: A dictionary mapping component to its
 563                                   regression.
 564
 565   Returns:
 566     A list of blame results.
 567   """
 568   if component_to_regression_dict:
 569     parsed_deps = component_to_regression_dict
 570   else:
 571     parsed_deps = component_to_crash_revision_dict
 572
 573   # Setup parser objects to use for parsing blame information.
 574   svn_parser = svn_repository_parser.SVNParser(CONFIG['svn'])
 575   git_parser = git_repository_parser.GitParser(parsed_deps, CONFIG['git'])
 576   parsers = {}
 577   parsers['svn'] = svn_parser
 578   parsers['git'] = git_parser
 579
 580   # Create and generate the blame objects from the callstack.
 581   blame_list = blame.BlameList()
 582   blame_list.FindBlame(callstack, component_to_crash_revision_dict,
 583                        component_to_regression_dict,
 584                        parsers)
 585
 586   blame_list.FilterAndSortBlameList()
 587   return crash_utils.BlameListToResultList(blame_list)
 588
 589
 590 def FindItForCrash(stacktrace_list,
 591                    callstack,
 592                    component_to_regression_dict,
 593                    component_to_crash_revision_dict):
 594   """Finds the culprit CL from the list of stacktrace.
 595
 596   Args:
 597     stacktrace_list: A list of stacktraces to look for, in the order of
 598                      decreasing significance.
 599     callstack: A callstack object to show blame information for, if there are
 600                no results for all stacktraces in the stacktrace_list.
 601     component_to_regression_dict: A parsed regression information as a
 602                                   result of parsing DEPS file.
 603     component_to_crash_revision_dict: A parsed crash revision information.
 604
 605   Returns:
 606     A list of result objects, with the message how the result is created.
 607   """
 608   # If regression information is not available, return blame information.
 609   if not component_to_regression_dict:
 610     result = GenerateAndFilterBlameList(callstack,
 611                                         component_to_crash_revision_dict,
 612                                         component_to_regression_dict)
 613     if result:
 614       return_message = (
 615           'Regression information is not available. The result is '
 616           'the blame information.')
 617     else:
 618       return_message = ('Findit could not find any suspected CLs.')
 619
 620     return (return_message, result)
 621
 622   for stacktrace in stacktrace_list:
 623     # Check the next stacktrace if current one is empty.
 624     if not stacktrace.stack_list:
 625       continue
 626
 627     # Get the crash stack for this stacktrace, and extract crashing components
 628     # from it.
 629     main_stack = stacktrace.GetCrashStack()
 630     components = ParseCrashComponents(main_stack)
 631
 632     result_for_stacktrace = FindMatchForStacktrace(
 633         stacktrace, components, component_to_regression_dict)
 634     filtered_result = FilterAndGenerateReasonForMatches(result_for_stacktrace)
 635
 636     # If the result is empty, check the next stacktrace. Else, return the
 637     # filtered result.
 638     if not filtered_result:
 639       continue
 640
 641     return_message = (
 642         'The result is a list of CLs that change the crashed files.')
 643     return (return_message, filtered_result)
 644
 645   # If no match is found, return the blame information for the input
 646   # callstack.
 647   result = GenerateAndFilterBlameList(
 648       callstack, component_to_crash_revision_dict,
 649       component_to_regression_dict)
 650
 651   if result:
 652     return_message = (
 653         'No CL in the regression range changes the crashed files. '
 654         'The result is the blame information.')
 655
 656   # When findit could not find any CL that changes file in stacktrace or if
 657   # if cannot get any blame information, return a message saying that no
 658   # results are available.
 659   else:
 660     return_message = ('Findit could not find any suspected CLs.')
 661
 662   return (return_message, result)
 663