1 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
14 from common
import utils
15 from result
import Result
18 INFINITY
= float('inf')
20 MAX_THREAD_NUMBER
= 10
24 def SignalWorkerThreads():
29 for i
in range(MAX_THREAD_NUMBER
):
32 # Give worker threads a chance to exit.
33 # Workaround the harmless bug in python 2.7 below.
37 atexit
.register(SignalWorkerThreads
)
44 task
= TASK_QUEUE
.get()
48 # According to http://bugs.python.org/issue14623, this is a harmless bug
49 # in python 2.7 which won't be fixed.
50 # The exception is raised on daemon threads when python interpreter is
54 function
, args
, kwargs
, result_semaphore
= task
56 function(*args
, **kwargs
)
60 # Signal one task is done in case of exception.
61 result_semaphore
.release()
65 """Run given tasks. Not thread-safe: no concurrent calls of this function.
67 Return after all tasks were completed. A task is a dict as below:
69 'function': the function to call,
70 'args': the positional argument to pass to the function,
71 'kwargs': the key-value arguments to pass to the function,
79 TASK_QUEUE
= Queue
.Queue()
80 for index
in range(MAX_THREAD_NUMBER
):
81 thread
= threading
.Thread(target
=Worker
, name
='worker_%s' % index
)
82 # Set as daemon, so no join is needed.
86 result_semaphore
= threading
.Semaphore(0)
87 # Push task to task queue for execution.
90 (task
['function'], task
.get('args', []),
91 task
.get('kwargs', {}), result_semaphore
))
93 # Wait until all tasks to be executed.
95 result_semaphore
.acquire()
98 def GetRepositoryType(revision_number
):
99 """Returns the repository type of this revision number.
102 revision_number: A revision number or git hash.
105 'git' or 'svn', depending on the revision_number.
107 if utils
.IsGitHash(revision_number
):
113 def ParseURLsFromConfig(file_name
):
114 """Parses URLS from the config file.
116 The file should be in python config format, where svn section is in the
117 format "svn:component_path".
118 Each of the section for svn should contain changelog_url, revision_url,
119 diff_url and blame_url.
122 file_name: The name of the file that contains URL information.
125 A dictionary that maps repository type to list of URLs. For svn, it maps
126 key 'svn' to another dictionary, which maps component path to the URLs
127 as explained above. For git, it maps to the URLs as explained above.
129 config
= ConfigParser
.ConfigParser()
131 # Get the absolute path of the config file, and read the file. If it fails,
133 config_file_path
= os
.path
.join(os
.path
.abspath(os
.path
.dirname(__file__
)),
135 config
.read(config_file_path
)
139 # Iterate through the config file, check for sections.
141 for section
in config
.sections():
142 # These two do not need another layer of dictionary, so add it and go
144 if ':' not in section
:
145 for option
in config
.options(section
):
146 if section
not in config_dict
:
147 config_dict
[section
] = {}
149 url
= config
.get(section
, option
)
150 config_dict
[section
][option
] = url
154 # Get repository type and component name from the section name.
155 repository_type_and_component
= section
.split(':')
156 repository_type
= repository_type_and_component
[0]
157 component_path
= repository_type_and_component
[1]
159 # Add 'svn' as the key, if it is not already there.
160 if repository_type
not in config_dict
:
161 config_dict
[repository_type
] = {}
162 url_map_for_repository
= config_dict
[repository_type
]
164 # Add the path to the 'svn', if it is not already there.
165 if component_path
not in url_map_for_repository
:
166 url_map_for_repository
[component_path
] = {}
167 type_to_url
= url_map_for_repository
[component_path
]
169 # Add all URLs to this map.
170 for option
in config
.options(section
):
171 url
= config
.get(section
, option
)
172 type_to_url
[option
] = url
177 def NormalizePath(path
, parsed_deps
):
178 """Normalizes the path.
181 path: A string representing a path.
182 parsed_deps: A map from component path to its component name, repository,
186 A tuple containing a component this path is in (e.g blink, skia, etc)
187 and a path in that component's repository. Returns None if the component
188 repository is not supported, i.e from googlecode.
190 # First normalize the path by retreiving the normalized path.
191 normalized_path
= os
.path
.normpath(path
).replace('\\', '/')
193 # Iterate through all component paths in the parsed DEPS, in the decreasing
194 # order of the length of the file path.
195 for component_path
in sorted(parsed_deps
,
196 key
=(lambda path
: -len(path
))):
197 # new_component_path is the component path with 'src/' removed.
198 new_component_path
= component_path
199 if new_component_path
.startswith('src/') and new_component_path
!= 'src/':
200 new_component_path
= new_component_path
[len('src/'):]
202 # We need to consider when the lowercased component path is in the path,
203 # because syzyasan build returns lowercased file path.
204 lower_component_path
= new_component_path
.lower()
206 # If this path is the part of file path, this file must be from this
208 if new_component_path
in normalized_path
or \
209 lower_component_path
in normalized_path
:
211 # Case when the retreived path is in lowercase.
212 if lower_component_path
in normalized_path
:
213 current_component_path
= lower_component_path
215 current_component_path
= new_component_path
217 # Normalize the path by stripping everything off the component's relative
219 normalized_path
= normalized_path
.split(current_component_path
, 1)[1]
220 lower_normalized_path
= normalized_path
.lower()
222 # Add 'src/' or 'Source/' at the front of the normalized path, depending
223 # on what prefix the component path uses. For example, blink uses
224 # 'Source' but chromium uses 'src/', and blink component path is
225 # 'src/third_party/WebKit/Source', so add 'Source/' in front of the
227 if (lower_component_path
== 'src/third_party/webkit/source' and
228 not lower_normalized_path
.startswith('source/')):
229 normalized_path
= (current_component_path
.split('/')[-2] + '/' +
232 component_name
= parsed_deps
[component_path
]['name']
234 return (component_path
, component_name
, normalized_path
)
236 # If the path does not match any component, default to chromium.
237 return ('src/', 'chromium', normalized_path
)
240 def SplitRange(regression
):
241 """Splits a range as retrieved from clusterfuzz.
244 regression: A string in format 'r1234:r5678'.
247 A list containing two numbers represented in string, for example
253 revisions
= regression
.split(':')
255 # If regression information is not available, return none.
256 if len(revisions
) != 2:
259 range_start
= revisions
[0]
260 range_end
= revisions
[1]
262 # Strip 'r' off the range start/end. Not using lstrip to avoid the case when
263 # the range is in git hash and it starts with 'r'.
264 if range_start
.startswith('r'):
265 range_start
= range_start
[1:]
267 if range_end
.startswith('r'):
268 range_end
= range_end
[1:]
270 return [range_start
, range_end
]
273 def LoadJSON(json_string
):
274 """Loads json object from string, or None.
277 json_string: A string to get object from.
280 JSON object if the string represents a JSON object, None otherwise.
283 data
= json
.loads(json_string
)
290 def GetDataFromURL(url
):
291 """Retrieves raw data from URL, tries 10 times.
294 url: URL to get data from.
295 retries: Number of times to retry connection.
298 None if the data retrieval fails, or the raw data.
300 status_code
, data
= utils
.GetHttpClient().Get(url
, retries
=10)
301 if status_code
== 200:
304 # Return None if it fails to read data.
308 def FindMinLineDistance(crashed_line_list
, changed_line_numbers
,
310 """Calculates how far the changed line is from one of the crashes.
312 Finds the minimum distance between the lines that the file crashed on
313 and the lines that the file changed. For example, if the file crashed on
314 line 200 and the CL changes line 203,204 and 205, the function returns 3.
317 crashed_line_list: A list of lines that the file crashed on.
318 changed_line_numbers: A list of lines that the file changed.
319 line_range: Number of lines to look back for.
322 The minimum distance. If either of the input lists is empty,
326 min_distance
= INFINITY
330 crashed_line_numbers
= set()
331 for crashed_line_range
in crashed_line_list
:
332 for crashed_line
in crashed_line_range
:
333 for line
in range(crashed_line
- line_range
, crashed_line
+ 1):
334 crashed_line_numbers
.add(line
)
336 for line
in crashed_line_numbers
:
337 for distance
in changed_line_numbers
:
338 # Find the current distance and update the min if current distance is
339 # less than current min.
340 current_distance
= abs(line
- distance
)
341 if current_distance
< min_distance
:
342 min_distance
= current_distance
344 changed_line
= distance
346 return (min_distance
, crashed_line
, changed_line
)
349 def GuessIfSameSubPath(path1
, path2
):
350 """Guesses if two paths represent same path.
352 Compares the name of the folders in the path (by split('/')), and checks
353 if they match either more than 3 or min of path lengths.
357 path2: Second path to compare.
360 True if it they are thought to be a same path, False otherwise.
362 path1
= path1
.split('/')
363 path2
= path2
.split('/')
365 intersection
= set(path1
).intersection(set(path2
))
366 return len(intersection
) >= (min(3, min(len(path1
), len(path2
))))
369 def FindMinStackFrameNumber(stack_frame_indices
, priorities
):
370 """Finds the minimum stack number, from the list of stack numbers.
373 stack_frame_indices: A list of lists containing stack position.
374 priorities: A list of of priority for each file.
377 Inf if stack_frame_indices is empty, minimum stack number otherwise.
379 # Get the indexes of the highest priority (or low priority number).
380 highest_priority
= min(priorities
)
381 highest_priority_indices
= []
382 for i
in range(len(priorities
)):
383 if priorities
[i
] == highest_priority
:
384 highest_priority_indices
.append(i
)
386 # Gather the list of stack frame numbers for the files that change the
389 for i
in highest_priority_indices
:
390 flattened
+= stack_frame_indices
[i
]
392 # If no stack frame information is available, return inf. Else, return min.
396 return min(flattened
)
399 def AddHyperlink(text
, link
):
400 """Returns a string with HTML link tag.
403 text: A string to add link.
404 link: A link to add to the string.
407 A string with hyperlink added.
409 sanitized_link
= cgi
.escape(link
, quote
=True)
410 sanitized_text
= cgi
.escape(str(text
))
411 return '<a href="%s">%s</a>' % (sanitized_link
, sanitized_text
)
414 def PrettifyList(items
):
415 """Returns a string representation of a list.
417 It adds comma in between the elements and removes the brackets.
419 items: A list to prettify.
421 A string representation of the list.
423 return ', '.join(map(str, items
))
426 def PrettifyFrameInfo(frame_indices
, functions
):
427 """Return a string to represent the frames with functions."""
429 for frame_index
, function
in zip(frame_indices
, functions
):
430 frames
.append('frame #%s, "%s"' % (frame_index
, function
.split('(')[0]))
431 return '; '.join(frames
)
434 def PrettifyFiles(file_list
):
435 """Returns a string representation of a list of file names.
438 file_list: A list of tuple, (file_name, file_url).
440 A string representation of file names with their urls.
443 for file_name
, file_url
in file_list
:
444 ret
.append(' %s\n' % AddHyperlink(file_name
, file_url
))
448 def Intersection(crashed_line_list
, stack_frame_index
, changed_line_numbers
,
449 function
, line_range
=3):
450 """Finds the overlap betwee changed lines and crashed lines.
452 Finds the intersection of the lines that caused the crash and
453 lines that the file changes. The intersection looks within 3 lines
454 of the line that caused the crash.
457 crashed_line_list: A list of lines that the file crashed on.
458 stack_frame_index: A list of positions in stack for each of the lines.
459 changed_line_numbers: A list of lines that the file changed.
460 function: A list of functions that the file crashed on.
461 line_range: Number of lines to look backwards from crashed lines.
464 line_number_intersection: Intersection between crashed_line_list and
465 changed_line_numbers.
466 stack_frame_index_intersection: Stack number for each of the intersections.
468 line_number_intersection
= []
469 stack_frame_index_intersection
= []
470 function_intersection
= []
472 # Iterate through the crashed lines, and its occurence in stack.
473 for (lines
, stack_frame_index
, function_name
) in zip(
474 crashed_line_list
, stack_frame_index
, function
):
475 # Also check previous 'line_range' lines. Create a set of all changed lines
476 # and lines within 3 lines range before the crashed line.
479 for line_in_range
in range(line
- line_range
, line
+ 1):
480 line_minus_n
.add(line_in_range
)
482 for changed_line
in changed_line_numbers
:
483 # If a CL does not change crahsed line, check next line.
484 if changed_line
not in line_minus_n
:
487 intersected_line
= set()
488 # If the changed line is exactly the crashed line, add that line.
490 if line
in changed_line_numbers
:
491 intersected_line
.add(line
)
493 # If the changed line is in 3 lines of the crashed line, add the line.
495 intersected_line
.add(changed_line
)
497 # Avoid adding the same line twice.
498 if intersected_line
not in line_number_intersection
:
499 line_number_intersection
.append(list(intersected_line
))
500 stack_frame_index_intersection
.append(stack_frame_index
)
501 function_intersection
.append(function_name
)
504 return (line_number_intersection
, stack_frame_index_intersection
,
505 function_intersection
)
508 def MatchListToResultList(matches
):
509 """Convert list of matches to the list of result objects.
512 matches: A list of match objects along with its stack priority and revision
515 A list of result object.
520 for _
, cl
, match
in matches
:
522 revision_url
= match
.revision_url
523 component_name
= match
.component_name
524 author
= match
.author
525 reason
= match
.reason
.strip()
526 review_url
= match
.review_url
527 reviewers
= match
.reviewers
528 # For matches, line content do not exist.
530 message
= match
.message
533 result
= Result(suspected_cl
, revision_url
, component_name
, author
, reason
,
534 review_url
, reviewers
, line_content
, message
, time
)
535 result_list
.append(result
)
540 def BlameListToResultList(blame_list
):
541 """Convert blame list to the list of result objects.
544 blame_list: A list of blame objects.
547 A list of result objects.
551 for blame
in blame_list
:
552 suspected_cl
= blame
.revision
553 revision_url
= blame
.url
554 component_name
= blame
.component_name
555 author
= blame
.author
557 'The CL last changed line %s of file %s, which is stack frame %d.' %
558 (blame
.line_number
, blame
.file, blame
.stack_frame_index
))
560 # Blame object does not have review url and reviewers.
563 line_content
= blame
.line_content
564 message
= blame
.message
566 result
= Result(suspected_cl
, revision_url
, component_name
, author
, reason
,
567 review_url
, reviewers
, line_content
, message
, time
)
568 result_list
.append(result
)