From 128e04363f4371094e3376d81ce100818e9b8a82 Mon Sep 17 00:00:00 2001 From: Scott Behrens Date: Mon, 19 Dec 2011 15:32:22 -0600 Subject: [PATCH] Update neopi.py --- neopi.py | 921 ++++++++++++++++++++++++++++++++------------------------------- 1 file changed, 462 insertions(+), 459 deletions(-) rewrite neopi.py (89%) diff --git a/neopi.py b/neopi.py dissimilarity index 89% index f2ceee8..c2cc251 100644 --- a/neopi.py +++ b/neopi.py @@ -1,459 +1,462 @@ -#!/usr/bin/python -# Name: neopi.py -# Description: Utility to scan a file path for encrypted and obfuscated files -# Authors: Ben Hagen (ben.hagen@neohapsis.com) -# Scott Behrens (scott.behrens@neohapsis.com) -# -# Date: 11/4/2010 -# -# pep-0008 - Is stupid. TABS FO'EVER! - -# Try catch regular expressions/bad path/bad filename/bad regex/ - -# Library imports -import math -import sys -import os -import re -import csv -import zlib -import time -from collections import defaultdict -from optparse import OptionParser - -class LanguageIC: - """Class that calculates a file's Index of Coincidence as - as well as a a subset of files average Index of Coincidence. - """ - def __init__(self): - """Initialize results arrays as well as character counters.""" - self.char_count = defaultdict(int) - self.total_char_count = 0 - self.results = [] - self.ic_total_results = "" - - def calculate_char_count(self,data): - """Method to calculate character counts for a particular data file.""" - if not data: - return 0 - for x in range(256): - char = chr(x) - charcount = data.count(char) - self.char_count[char] += charcount - self.total_char_count += charcount - return - - def calculate_IC(self): - """Calculate the Index of Coincidence for the self variables""" - total = 0 - for val in self.char_count.values(): - - if val == 0: - continue - total += val * (val-1) - - try: - ic_total = float(total)/(self.total_char_count * (self.total_char_count - 1)) - except: - ic_total = 0 - self.ic_total_results = ic_total - return - - def calculate(self,data,filename): - """Calculate the Index of Coincidence for a file and append to self.ic_results array""" - if not data: - return 0 - char_count = 0 - total_char_count = 0 - - for x in range(256): - char = chr(x) - charcount = data.count(char) - char_count += charcount * (charcount - 1) - total_char_count += charcount - - ic = float(char_count)/(total_char_count * (total_char_count - 1)) - self.results.append({"filename":filename, "value":ic}) - # Call method to calculate_char_count and append to total_char_count - self.calculate_char_count(data) - return ic - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the top signature count match files for a given search""" - # Calculate the Total IC for a Search - self.calculate_IC() - print "\n[[ Average IC for Search ]]" - print self.ic_total_results - print "\n[[ Top %i lowest IC files ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - -class Entropy: - """Class that calculates a file's Entropy.""" - - def __init__(self): - """Instantiate the entropy_results array.""" - self.results = [] - - def calculate(self,data,filename): - """Calculate the entropy for 'data' and append result to entropy_results array.""" - - if not data: - return 0 - entropy = 0 - self.stripped_data =data.replace(' ', '') - for x in range(256): - p_x = float(self.stripped_data.count(chr(x)))/len(self.stripped_data) - if p_x > 0: - entropy += - p_x * math.log(p_x, 2) - self.results.append({"filename":filename, "value":entropy}) - return entropy - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results.reverse() - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the top signature count match files for a given search""" - print "\n[[ Top %i entropic files for a given search ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - -class LongestWord: - """Class that determines the longest word for a particular file.""" - def __init__(self): - """Instantiate the longestword_results array.""" - self.results = [] - - def calculate(self,data,filename): - """Find the longest word in a string and append to longestword_results array""" - if not data: - return "", 0 - longest = 0 - longest_word = "" - words = re.split("[\s,\n,\r]", data) - if words: - for word in words: - length = len(word) - if length > longest: - longest = length - longest_word = word - self.results.append({"filename":filename, "value":longest}) - return longest - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results.reverse() - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the top signature count match files for a given search""" - print "\n[[ Top %i longest word files ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - -class SignatureNasty: - """Generator that searches a given file for nasty expressions""" - - def __init__(self): - """Instantiate the results array.""" - self.results = [] - - def calculate(self, data, filename): - if not data: - return "", 0 - # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions - valid_regex = re.compile('(eval\(|file_put_contents|base64_decode|python_eval|exec\(|passthru|popen|proc_open|pcntl|assert\(|system\(|shell)', re.I) - matches = re.findall(valid_regex, data) - self.results.append({"filename":filename, "value":len(matches)}) - return len(matches) - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results.reverse() - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the top signature count match files for a given search""" - print "\n[[ Top %i signature match counts ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - - -class UsesEval: - """Generator that searches a given file for nasty eval with variable""" - - def __init__(self): - """Instantiate the eval_results array.""" - self.results = [] - - def calculate(self, data, filename): - if not data: - return "", 0 - # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions - valid_regex = re.compile('(eval\(\$(\w|\d))', re.I) - matches = re.findall(valid_regex, data) - self.results.append({"filename":filename, "value":len(matches)}) - return len(matches) - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results.reverse() - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the files that use eval""" - print "\n[[ Top %i eval match counts ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - - -class Compression: - """Generator finds compression ratio""" - - def __init__(self): - """Instantiate the results array.""" - self.results = [] - - def calculate(self, data, filename): - if not data: - return "", 0 - compressed = zlib.compress(data) - ratio = float(len(compressed)) / float(len(data)) - self.results.append({"filename":filename, "value":ratio}) - return ratio - - def sort(self): - self.results.sort(key=lambda item: item["value"]) - self.results.reverse() - self.results = resultsAddRank(self.results) - - def printer(self, count): - """Print the top files for a given search""" - print "\n[[ Top %i compression match counts ]]" % (count) - if (count > len(self.results)): count = len(self.results) - for x in range(count): - print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) - return - -def resultsAddRank(results): - rank = 1 - offset = 1 - previousValue = False - newList = [] - for file in results: - if (previousValue and previousValue != file["value"]): - rank = offset - file["rank"] = rank - newList.append(file) - previousValue = file["value"] - offset = offset + 1 - return newList - -class SearchFile: - """Generator that searches a given filepath with an optional regular - expression and returns the filepath and filename""" - def search_file_path(self, args, valid_regex): - for root, dirs, files in os.walk(args[0]): - for file in files: - filename = os.path.join(root, file) - if (valid_regex.search(file) and os.path.getsize(filename) > 60): - try: - data = open(root + "/" + file, 'rb').read() - except: - data = False - print "Could not read file :: %s/%s" % (root, file) - yield data, filename - -if __name__ == "__main__": - """Parse all the options""" - - timeStart = time.clock() - - print """ - ) ( ( - ( /( )\ ))\ ) - )\()) ( (()/(()/( - ((_)\ ))\ ( /(_))(_)) - _((_)/((_))\(_))(_)) - | \| (_)) ((_) _ \_ _| - | .` / -_) _ \ _/| | - |_|\_\___\___/_| |___| Ver. *.USEGIT - """ - - parser = OptionParser(usage="usage: %prog [options] ", - version="%prog 1.0") - parser.add_option("-c", "--csv", - action="store", - dest="is_csv", - default=False, - help="generate CSV outfile", - metavar="FILECSV") - parser.add_option("-a", "--all", - action="store_true", - dest="is_all", - default=False, - help="Run all (useful) tests [Entropy, Longest Word, IC, Signature]",) - parser.add_option("-z", "--zlib", - action="store_true", - dest="is_zlib", - default=False, - help="Run compression test",) - parser.add_option("-e", "--entropy", - action="store_true", - dest="is_entropy", - default=False, - help="Run entropy test",) - parser.add_option("-E", "--eval", - action="store_true", - dest="is_eval", - default=False, - help="Run signiture test for eval function and variable",) - parser.add_option("-l", "--longestword", - action="store_true", - dest="is_longest", - default=False, - help="Run longest word test",) - parser.add_option("-i", "--ic", - action="store_true", - dest="is_ic", - default=False, - help="Run IC test",) - parser.add_option("-s", "--signature", - action="store_true", - dest="is_signature", - default=False, - help="Run signature test",) - parser.add_option("-A", "--auto", - action="store_true", - dest="is_auto", - default=False, - help="Run auto file extension tests",) - parser.add_option("-u", "--unicode", - action="store_true", - dest="ignore_unicode", - default=False, - help="Skip over unicode-y/UTF'y files",) - - (options, args) = parser.parse_args() - - # Error on invalid number of arguements - if len(args) < 1: - parser.print_help() - print "" - sys.exit() - - # Error on an invalid path - if os.path.exists(args[0]) == False: - parser.error("Invalid path") - - valid_regex = "" - if (len(args) == 2 and options.is_auto is False): - try: - valid_regex = re.compile(args[1]) - except: - parser.error("Invalid regular expression") - else: - valid_regex = re.compile('.*') - tests = [] - - if options.is_auto: - valid_regex = re.compile('(\.php|\.asp|\.aspx|\.scath|\.bash|\.zsh|\.csh|\.tsch|\.pl|\.py|\.txt|\.cgi|\.cfm|\.htaccess)$') - - if options.is_all: - tests.append(LanguageIC()) - tests.append(Entropy()) - tests.append(LongestWord()) - tests.append(SignatureNasty()) - else: - if options.is_entropy: - tests.append(Entropy()) - if options.is_longest: - tests.append(LongestWord()) - if options.is_ic: - tests.append(LanguageIC()) - if options.is_signature: - tests.append(SignatureNasty()) - if options.is_eval: - tests.append(UsesEval()) - if options.is_zlib: - tests.append(Compression()) - - # Instantiate the Generator Class used for searching, opening, and reading files - locator = SearchFile() - - # CSV file output array - csv_array = [] - csv_header = ["filename"] - - # Grab the file and calculate each test against file - fileCount = 0 - fileIgnoreCount = 0 - for data, filename in locator.search_file_path(args, valid_regex): - if data: - # a row array for the CSV - csv_row = [] - csv_row.append(filename) - - if options.ignore_unicode: - asciiHighCount = 0 - for character in data: - if ord(character) > 127: - asciiHighCount = asciiHighCount + 1 - - fileAsciiHighRatio = float(asciiHighCount) / float(len(data)) - - if (options.ignore_unicode == False or fileAsciiHighRatio < .1): - for test in tests: - calculated_value = test.calculate(data, filename) - # Make the header row if it hasn't been fully populated, +1 here to account for filename column - if len(csv_header) < len(tests) + 1: - csv_header.append(test.__class__.__name__) - csv_row.append(calculated_value) - fileCount = fileCount + 1 - csv_array.append(csv_row) - else: - fileIgnoreCount = fileIgnoreCount + 1 - - if options.is_csv: - csv_array.insert(0,csv_header) - fileOutput = csv.writer(open(options.is_csv, "wb")) - fileOutput.writerows(csv_array) - - timeFinish = time.clock() - - # Print some stats - print "\n[[ Total files scanned: %i ]]" % (fileCount) - print "[[ Total files ignored: %i ]]" % (fileIgnoreCount) - print "[[ Scan Time: %f seconds ]]" % (timeFinish - timeStart) - - # Print top rank lists - rank_list = {} - for test in tests: - test.sort() - test.printer(10) - for file in test.results: - rank_list[file["filename"]] = rank_list.setdefault(file["filename"], 0) + file["rank"] - - rank_sorted = sorted(rank_list.items(), key=lambda x: x[1]) - - print "\n[[ Top cumulative ranked files ]]" - count = 10 - if (count > len(rank_sorted)): count = len(rank_sorted) - for x in range(count): - print ' {0:>7} {1}'.format(rank_sorted[x][1], rank_sorted[x][0]) \ No newline at end of file +#!/usr/bin/python +# Name: neopi.py +# Description: Utility to scan a file path for encrypted and obfuscated files +# Authors: Ben Hagen (ben.hagen@neohapsis.com) +# Scott Behrens (scott.behrens@neohapsis.com) +# +# Date: 11/4/2010 +# +# pep-0008 - Is stupid. TABS FO'EVER! too bad, spaces are back! + + + + +# Try catch regular expressions/bad path/bad filename/bad regex/ + +# Library imports +import math +import sys +import os +import re +import csv +import zlib +import time +from collections import defaultdict +from optparse import OptionParser + +class LanguageIC: + """Class that calculates a file's Index of Coincidence as + as well as a a subset of files average Index of Coincidence. + """ + def __init__(self): + """Initialize results arrays as well as character counters.""" + self.char_count = defaultdict(int) + self.total_char_count = 0 + self.results = [] + self.ic_total_results = "" + + def calculate_char_count(self,data): + """Method to calculate character counts for a particular data file.""" + if not data: + return 0 + for x in range(256): + char = chr(x) + charcount = data.count(char) + self.char_count[char] += charcount + self.total_char_count += charcount + return + + def calculate_IC(self): + """Calculate the Index of Coincidence for the self variables""" + total = 0 + for val in self.char_count.values(): + + if val == 0: + continue + total += val * (val-1) + + try: + ic_total = float(total)/(self.total_char_count * (self.total_char_count - 1)) + except: + ic_total = 0 + self.ic_total_results = ic_total + return + + def calculate(self,data,filename): + """Calculate the Index of Coincidence for a file and append to self.ic_results array""" + if not data: + return 0 + char_count = 0 + total_char_count = 0 + + for x in range(256): + char = chr(x) + charcount = data.count(char) + char_count += charcount * (charcount - 1) + total_char_count += charcount + + ic = float(char_count)/(total_char_count * (total_char_count - 1)) + self.results.append({"filename":filename, "value":ic}) + # Call method to calculate_char_count and append to total_char_count + self.calculate_char_count(data) + return ic + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the top signature count match files for a given search""" + # Calculate the Total IC for a Search + self.calculate_IC() + print "\n[[ Average IC for Search ]]" + print self.ic_total_results + print "\n[[ Top %i lowest IC files ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + +class Entropy: + """Class that calculates a file's Entropy.""" + + def __init__(self): + """Instantiate the entropy_results array.""" + self.results = [] + + def calculate(self,data,filename): + """Calculate the entropy for 'data' and append result to entropy_results array.""" + + if not data: + return 0 + entropy = 0 + self.stripped_data =data.replace(' ', '') + for x in range(256): + p_x = float(self.stripped_data.count(chr(x)))/len(self.stripped_data) + if p_x > 0: + entropy += - p_x * math.log(p_x, 2) + self.results.append({"filename":filename, "value":entropy}) + return entropy + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results.reverse() + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the top signature count match files for a given search""" + print "\n[[ Top %i entropic files for a given search ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + +class LongestWord: + """Class that determines the longest word for a particular file.""" + def __init__(self): + """Instantiate the longestword_results array.""" + self.results = [] + + def calculate(self,data,filename): + """Find the longest word in a string and append to longestword_results array""" + if not data: + return "", 0 + longest = 0 + longest_word = "" + words = re.split("[\s,\n,\r]", data) + if words: + for word in words: + length = len(word) + if length > longest: + longest = length + longest_word = word + self.results.append({"filename":filename, "value":longest}) + return longest + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results.reverse() + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the top signature count match files for a given search""" + print "\n[[ Top %i longest word files ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + +class SignatureNasty: + """Generator that searches a given file for nasty expressions""" + + def __init__(self): + """Instantiate the results array.""" + self.results = [] + + def calculate(self, data, filename): + if not data: + return "", 0 + # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions + valid_regex = re.compile('(eval\(|file_put_contents|base64_decode|python_eval|exec\(|passthru|popen|proc_open|pcntl|assert\(|system\(|shell)', re.I) + matches = re.findall(valid_regex, data) + self.results.append({"filename":filename, "value":len(matches)}) + return len(matches) + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results.reverse() + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the top signature count match files for a given search""" + print "\n[[ Top %i signature match counts ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + + +class UsesEval: + """Generator that searches a given file for nasty eval with variable""" + + def __init__(self): + """Instantiate the eval_results array.""" + self.results = [] + + def calculate(self, data, filename): + if not data: + return "", 0 + # Lots taken from the wonderful post at http://stackoverflow.com/questions/3115559/exploitable-php-functions + valid_regex = re.compile('(eval\(\$(\w|\d))', re.I) + matches = re.findall(valid_regex, data) + self.results.append({"filename":filename, "value":len(matches)}) + return len(matches) + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results.reverse() + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the files that use eval""" + print "\n[[ Top %i eval match counts ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + + +class Compression: + """Generator finds compression ratio""" + + def __init__(self): + """Instantiate the results array.""" + self.results = [] + + def calculate(self, data, filename): + if not data: + return "", 0 + compressed = zlib.compress(data) + ratio = float(len(compressed)) / float(len(data)) + self.results.append({"filename":filename, "value":ratio}) + return ratio + + def sort(self): + self.results.sort(key=lambda item: item["value"]) + self.results.reverse() + self.results = resultsAddRank(self.results) + + def printer(self, count): + """Print the top files for a given search""" + print "\n[[ Top %i compression match counts ]]" % (count) + if (count > len(self.results)): count = len(self.results) + for x in range(count): + print ' {0:>7.4f} {1}'.format(self.results[x]["value"], self.results[x]["filename"]) + return + +def resultsAddRank(results): + rank = 1 + offset = 1 + previousValue = False + newList = [] + for file in results: + if (previousValue and previousValue != file["value"]): + rank = offset + file["rank"] = rank + newList.append(file) + previousValue = file["value"] + offset = offset + 1 + return newList + +class SearchFile: + """Generator that searches a given filepath with an optional regular + expression and returns the filepath and filename""" + def search_file_path(self, args, valid_regex): + for root, dirs, files in os.walk(args[0]): + for file in files: + filename = os.path.join(root, file) + if (valid_regex.search(file) and os.path.getsize(filename) > 60): + try: + data = open(root + "/" + file, 'rb').read() + except: + data = False + print "Could not read file :: %s/%s" % (root, file) + yield data, filename + +if __name__ == "__main__": + """Parse all the options""" + + timeStart = time.clock() + + print """ + ) ( ( + ( /( )\ ))\ ) + )\()) ( (()/(()/( + ((_)\ ))\ ( /(_))(_)) + _((_)/((_))\(_))(_)) + | \| (_)) ((_) _ \_ _| + | .` / -_) _ \ _/| | + |_|\_\___\___/_| |___| Ver. *.USEGIT + """ + + parser = OptionParser(usage="usage: %prog [options] ", + version="%prog 1.0") + parser.add_option("-c", "--csv", + action="store", + dest="is_csv", + default=False, + help="generate CSV outfile", + metavar="FILECSV") + parser.add_option("-a", "--all", + action="store_true", + dest="is_all", + default=False, + help="Run all (useful) tests [Entropy, Longest Word, IC, Signature]",) + parser.add_option("-z", "--zlib", + action="store_true", + dest="is_zlib", + default=False, + help="Run compression Test",) + parser.add_option("-e", "--entropy", + action="store_true", + dest="is_entropy", + default=False, + help="Run entropy Test",) + parser.add_option("-E", "--eval", + action="store_true", + dest="is_eval", + default=False, + help="Run signiture test for the eval",) + parser.add_option("-l", "--longestword", + action="store_true", + dest="is_longest", + default=False, + help="Run longest word test",) + parser.add_option("-i", "--ic", + action="store_true", + dest="is_ic", + default=False, + help="Run IC test",) + parser.add_option("-s", "--signature", + action="store_true", + dest="is_signature", + default=False, + help="Run signature test",) + parser.add_option("-A", "--auto", + action="store_true", + dest="is_auto", + default=False, + help="Run auto file extension tests",) + parser.add_option("-u", "--unicode", + action="store_true", + dest="ignore_unicode", + default=False, + help="Skip over unicode-y/UTF'y files",) + + (options, args) = parser.parse_args() + + # Error on invalid number of arguements + if len(args) < 1: + parser.print_help() + print "" + sys.exit() + + # Error on an invalid path + if os.path.exists(args[0]) == False: + parser.error("Invalid path") + + valid_regex = "" + if (len(args) == 2 and options.is_auto is False): + try: + valid_regex = re.compile(args[1]) + except: + parser.error("Invalid regular expression") + else: + valid_regex = re.compile('.*') + tests = [] + + if options.is_auto: + valid_regex = re.compile('(\.php|\.asp|\.aspx|\.scath|\.bash|\.zsh|\.csh|\.tsch|\.pl|\.py|\.txt|\.cgi|\.cfm|\.htaccess)$') + + if options.is_all: + tests.append(LanguageIC()) + tests.append(Entropy()) + tests.append(LongestWord()) + tests.append(SignatureNasty()) + else: + if options.is_entropy: + tests.append(Entropy()) + if options.is_longest: + tests.append(LongestWord()) + if options.is_ic: + tests.append(LanguageIC()) + if options.is_signature: + tests.append(SignatureNasty()) + if options.is_eval: + tests.append(UsesEval()) + if options.is_zlib: + tests.append(Compression()) + + # Instantiate the Generator Class used for searching, opening, and reading files + locator = SearchFile() + + # CSV file output array + csv_array = [] + csv_header = ["filename"] + + # Grab the file and calculate each test against file + fileCount = 0 + fileIgnoreCount = 0 + for data, filename in locator.search_file_path(args, valid_regex): + if data: + # a row array for the CSV + csv_row = [] + csv_row.append(filename) + + if options.ignore_unicode: + asciiHighCount = 0 + for character in data: + if ord(character) > 127: + asciiHighCount = asciiHighCount + 1 + + fileAsciiHighRatio = float(asciiHighCount) / float(len(data)) + + if (options.ignore_unicode == False or fileAsciiHighRatio < .1): + for test in tests: + calculated_value = test.calculate(data, filename) + # Make the header row if it hasn't been fully populated, +1 here to account for filename column + if len(csv_header) < len(tests) + 1: + csv_header.append(test.__class__.__name__) + csv_row.append(calculated_value) + fileCount = fileCount + 1 + csv_array.append(csv_row) + else: + fileIgnoreCount = fileIgnoreCount + 1 + + if options.is_csv: + csv_array.insert(0,csv_header) + fileOutput = csv.writer(open(options.is_csv, "wb")) + fileOutput.writerows(csv_array) + + timeFinish = time.clock() + + # Print some stats + print "\n[[ Total files scanned: %i ]]" % (fileCount) + print "[[ Total files ignored: %i ]]" % (fileIgnoreCount) + print "[[ Scan Time: %f seconds ]]" % (timeFinish - timeStart) + + # Print top rank lists + rank_list = {} + for test in tests: + test.sort() + test.printer(10) + for file in test.results: + rank_list[file["filename"]] = rank_list.setdefault(file["filename"], 0) + file["rank"] + + rank_sorted = sorted(rank_list.items(), key=lambda x: x[1]) + + print "\n[[ Top cumulative ranked files ]]" + count = 10 + if (count > len(rank_sorted)): count = len(rank_sorted) + for x in range(count): + print ' {0:>7} {1}'.format(rank_sorted[x][1], rank_sorted[x][0]) \ No newline at end of file -- 2.11.4.GIT