[cage] Add some karma aliases for myself
[parrot.git] / tools / dev / parrot-fuzzer
blob52dbcb5f7e2f78e8c35bf61f4956aa5639f98062
1 #!/usr/bin/env python
2 # Copyright (C) 2009, Parrot Foundation
3 # $Id$
5 from fusil.application import Application
6 from fusil.process.watch import WatchProcess
7 from fusil.process.create import CreateProcess
8 from fusil.process.stdout import WatchStdout
9 from fusil.project_agent import ProjectAgent
10 from fusil.process.tools import locateProgram
11 from fusil.write_code import WriteCode
12 from optparse import OptionGroup
13 import re
14 import string
15 import random
17 '''
19 =head1 Name
21 parrot-fuzzer
23 =head1 Description
25 This is a fuzzer for Parrot, written in Python using the fusil library. It
26 attempts to break Parrot by generating calls to random PIR opcodes.
28 =head1 Dependencies
30 This script requires Python 2.5+ to run. The fusil
31 L<http://fusil.hachoir.org/trac> and python-ptrace
32 L<http://python-ptrace.hachoir.org/trac> libraries are also required.
34 =head1 Running
36 Short version: C<sudo ./tools/dev/parrot-fuzzer>
38 C<parrot-fuzzer> is run like any other fusil-based fuzzer. Fusil likes to be
39 run as root. This so that the child process in which Parrot runs can be put in
40 a more restricted environment, limiting potential damage.
42 fusil assumes the existence of a C<fusil> user and group. Parrot runs as this
43 user/group as part of its restricted environment. Passing C<--unsafe> allows
44 it to run as the current user. Although it is not likely that this will cause
45 any damage to your system, it is possible.
47 C<parrot-fuzzer> needs access to Parrot's source code in order to figure out
48 which PMCs and ops are available. It assumes that it's running in the root dir
49 of Parrot's source code. You can use a different dir via
50 C<--parrot_root=/some/other/path>.
52 =head1 Options
54 =over 4
56 =item C<--parrot_root=/path/to/parrot>
58 Specify the path to the root of Parrot's source dir. By default, this is the
59 current dir.
61 =item C<--runcore=--some-runcore>
63 Specify which runcore to use when running Parrot. The default is the C<slow>
64 core. This option corresponds directly to Parrot's C<--runcore> option. Other
65 runcores include C<fast>, C<jit>, C<cgoto>, C<cgp>, C<cgp-jit>, and C<switch>.
66 See Parrot's help for more details.
68 =item C<--ignore_blacklist>
70 Some PMCs and opcodes are known to cause false positives or results of limited
71 value. These are blacklisted by default. Using C<--ignore_blacklist> causes
72 the fuzzer to use all available PMCs and opcodes, even those known to behave
73 badly during testing.
75 =item C<--instructions=10>
77 Generate this number of instructions during test run. The default is 3. Note
78 that a larger number such as 20 does not necessarily result in more failures.
80 =back
82 =head1 License
84 This program is distributed under the same license as Parrot itself.
86 =cut
88 '''
90 class ParrotFuzzer(Application):
92 #base name of the dir where temp files and successful results will be stored
93 NAME="parrot-fuzz"
95 def createFuzzerOptions(self, parser):
96 options = OptionGroup(parser, "Parrot fuzzer")
97 options.add_option("--parrot_root",
98 help="Parrot program path (default: .)",
99 type="str",
100 default=".")
101 options.add_option("--runcore",
102 help="Run Parrot with the specified runcore (default: --slow-core)",
103 type="str",
104 default="--slow-core")
105 options.add_option("--instructions",
106 help="Generate this many instructions per test run (default: 3)",
107 type="int",
108 default="3")
109 options.add_option("--ignore_blacklist",
110 help="Use opcodes and PMCs known to cause bad or questionable results (default: use blacklists)",
111 action="store_true",
112 default=False)
113 return options
116 def setupProject(self):
117 parrot_root = self.options.parrot_root
118 runcore = self.options.runcore
119 parrot = locateProgram(parrot_root + "/parrot")
120 process = ParrotProcess(self.project, [parrot, runcore, "<fuzzy.pir>"])
121 pirgen = PirGenerator(self.project, self.options)
122 WatchProcess(process)
123 WatchStdout(process)
125 class PirGenerator(ProjectAgent, WriteCode):
127 def __init__(self, project, options):
128 self.parrot_root = options.parrot_root
129 self.instruction_count = options.instructions
130 self.ignore_blacklist = options.ignore_blacklist
131 self.opfunc_gen = OpfuncGenerator()
132 self.arg_gen = ArgGenerator(self.parrot_root, self.ignore_blacklist)
134 self.opfunc_gen.populateOpfuncList(self.parrot_root, self.ignore_blacklist)
136 ProjectAgent.__init__(self, project, "pir_source")
137 WriteCode.__init__(self)
139 def generatePir(self, filename):
141 self.pir_body = ''
142 self.pir_preamble = """
143 .sub main
144 $P0 = new ['ExceptionHandler']
145 set_addr $P0, catchall
146 push_eh $P0 #pokemon: gotta catch 'em all
148 self.pir_postamble = """
149 catchall:
150 #Don't do anything with exceptions: we're hoping for a segfault or similar.
151 .end
153 #how many instructions to generate
154 #Strangely, a low number like 3 seems to generate slightly more faults
155 #than a high number like 20.
156 opfunc_count = self.instruction_count
157 self.pir_body += " #generating "+str(opfunc_count)+" instructions\n"
159 arg_types = ['s', 'p', 'i', 'n', 'sc', 'ic', 'nc']
160 opfuncs = []
161 arg_counts = dict()
162 self.createFile(filename)
163 arg_gen = self.arg_gen
165 #pick some opfuncs
166 for i in range(opfunc_count):
167 opfuncs.append(OpfuncCall(*self.opfunc_gen.getOpfunc()))
169 #calculate how many of each type of arg will be needed
170 for arg_type in arg_types:
171 arg_counts[arg_type] = 0
172 for opfunc in opfuncs:
173 arg_counts[arg_type] += opfunc.getArgCount(arg_type)
175 for arg_type in arg_types:
176 #print "need "+str(arg_counts[arg_type])+" args of type "+arg_type
177 arg_gen.setArgCount(arg_type, arg_counts[arg_type])
179 #generate the args, adding any supporting code to the preamble
180 self.pir_preamble += arg_gen.generateStringArgs()
181 self.pir_preamble += arg_gen.generatePMCArgs()
182 self.pir_preamble += arg_gen.generateIntArgs()
183 self.pir_preamble += arg_gen.generateNumArgs()
184 self.pir_preamble += arg_gen.generateStringConstArgs()
185 self.pir_preamble += arg_gen.generateIntConstArgs()
186 self.pir_preamble += arg_gen.generateNumConstArgs()
188 #put the args into the opfunc calls
189 for opfunc in opfuncs:
190 #print "working on " + opfunc.getLongName()
191 for arg_num in range(opfunc.getTotalArgCount()):
192 arg_type = opfunc.getArgType(arg_num)
193 #print "arg type for #"+str(arg_num)+" is "+arg_type
194 opfunc.setArgVal(arg_num, arg_gen.getArgVal(arg_type))
195 #append getOpfuncCall
196 self.pir_body += opfunc.getOpfuncCall()
198 #write the code
199 self.write(0, self.pir_preamble)
200 self.write(0, self.pir_body)
201 self.write(0, self.pir_postamble)
202 self.close()
204 def on_session_start(self):
205 filename = self.session().createFilename('fuzzy.pir')
206 self.generatePir(filename)
207 self.send('pir_source', filename)
209 #Representation of a call to an opfunc, including values of arguments
210 #Note that argumens are literal, e.g. '$P0', '"foo"', etc
211 class OpfuncCall:
212 def __init__(self, name, sig):
213 self.arg_types = []
214 self.arg_vals = []
215 self.name = name
216 if sig == '':
217 self.long_name = name
218 else:
219 self.long_name = name + '_' + sig
220 self.total_arg_count = 0
221 #print "making an opfunc: " + self.long_name
222 if sig != '':
223 for arg in string.split(sig, "_"):
224 self.arg_types.append(arg)
225 self.arg_vals.append('')
226 self.total_arg_count += 1
227 #print "found an arg: " + arg
229 def getLongName(self):
230 return self.long_name
232 def getArgCount(self, arg):
233 return self.arg_types.count(arg)
235 def getTotalArgCount(self):
236 return self.total_arg_count
238 def getArgType(self, n):
239 return self.arg_types[n]
241 def getArgType(self, n):
242 return self.arg_types[n]
244 def setArgVal(self, n, arg_val):
245 self.arg_vals[n] = arg_val
247 def getOpfuncCall(self):
248 opfunc_call = '\n #'+self.long_name+'\n ' + self.name
249 for arg_val in self.arg_vals:
250 opfunc_call += ' ' + arg_val + ','
251 opfunc_call = string.rstrip(opfunc_call, ",")
252 opfunc_call += "\n"
253 return opfunc_call
255 class ArgGenerator:
256 arg_counts = {}
257 args = {}
259 def __init__(self, parrot_root, ignore_blacklist):
260 self.pmc_gen = PMCTypeGenerator()
261 self.pmc_gen.populatePMCList(parrot_root, ignore_blacklist)
263 def setArgCount(self, arg_type, count):
264 self.arg_counts[arg_type] = count
266 def getArgVal(self, arg_type):
267 return random.choice(self.args[arg_type])
269 def generateStringArgs(self):
270 pir_preamble = ""
271 self.args['s'] = []
272 for n in range(self.arg_counts['s']):
273 str_val = self.getString()
274 pir_preamble += " $S" + str(n) + " = \"" + str_val + "\"\n"
275 self.args['s'].append('$S' + str(n))
276 return pir_preamble
278 def generatePMCArgs(self):
279 pir_preamble = ""
280 self.args['p'] = []
281 for n in range(self.arg_counts['p']):
282 pir_preamble += " $P" + str(n) + " = new ['" + self.pmc_gen.getPMCType() + "']\n"
283 self.args['p'].append('$P' + str(n))
284 return pir_preamble
286 def generateIntArgs(self):
287 pir_preamble = ""
288 self.args['i'] = []
289 for n in range(self.arg_counts['i']):
290 num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
292 if num == 'neg_many':
293 num_val = random.randint(-999999,-2)
294 if num == 'neg_one':
295 num_val = -1
296 if num == 'zero':
297 num_val = 0
298 if num == 'pos_one':
299 num_val = 1
300 if num == 'pos_many':
301 num_val = random.randint(2, 999999)
303 pir_preamble += " $I" + str(n) + " = "+str(num_val)+"\n"
304 self.args['i'].append('$I' + str(n))
305 return pir_preamble
307 def generateNumArgs(self):
308 pir_preamble = ""
309 self.args['n'] = []
310 for n in range(self.arg_counts['n']):
311 num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
313 if num == 'neg_many':
314 num_val = (random.random() * -999999) - 1
315 if num == 'neg_one':
316 num_val = -1.0
317 if num == 'zero':
318 num_val = 0.0
319 if num == 'pos_one':
320 num_val = 1.0
321 if num == 'pos_many':
322 num_val = (random.random() * 999999) + 1
323 pir_preamble += " $N" + str(n) + " = "+str(num_val)+"\n"
324 self.args['n'].append('$N' + str(n))
325 return pir_preamble
327 def generateStringConstArgs(self):
328 pir_preamble = ""
329 self.args['sc'] = []
330 for n in range(self.arg_counts['sc']):
331 self.args['sc'].append('"'+self.getString()+'"')
332 return pir_preamble
334 def generateIntConstArgs(self):
335 pir_preamble = ""
336 self.args['ic'] = []
337 for n in range(self.arg_counts['ic']):
338 #negative numbers and zero mess up control flow-related ops
339 #num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
340 num = random.choice(['pos_one','pos_many'])
342 if num == 'neg_many':
343 num_val = random.randint(-999999,-2)
344 if num == 'neg_one':
345 num_val = -1
346 if num == 'zero':
347 num_val = 0
348 if num == 'pos_one':
349 num_val = 1
350 if num == 'pos_many':
351 num_val = random.randint(2, 999999)
353 self.args['ic'].append(str(num_val))
354 return pir_preamble
356 def generateNumConstArgs(self):
357 pir_preamble = ""
358 self.args['nc'] = []
359 for n in range(self.arg_counts['nc']):
360 num = random.choice(['neg_many','neg_one','zero','pos_one','pos_many'])
362 if num == 'neg_many':
363 num_val = (random.random() * -999999) - 1
364 if num == 'neg_one':
365 num_val = -1.0
366 if num == 'zero':
367 num_val = 0.0
368 if num == 'pos_one':
369 num_val = 1.0
370 if num == 'pos_many':
371 num_val = (random.random() * 999999) + 1
372 self.args['nc'].append(str(num_val))
373 return pir_preamble
375 def getString(self):
376 str_val = ''
377 chars = string.printable + string.punctuation + string.whitespace
378 str_len = random.randint(0,10)
379 for m in range(str_len):
380 char = chars[random.randint(0, len(chars)-1)]
381 if char == '"':
382 char = '\\"'
383 if char == '\\':
384 char = '\\\\'
385 if char == '\n' or char == '\r':
386 char = ''
387 str_val += char
388 return str_val
390 class PMCTypeGenerator:
391 pmc_list = []
392 pmc_blacklist = [
393 'Packfile',
394 'PackfileAnnotation',
395 'PackfileAnnotationKeys',
396 'PackfileAnnotations',
397 'PackfileConstantTable',
398 'PackfileDirectory',
399 'PackfileFixupEntry',
400 'PackfileFixupTable',
401 'PackfileRawSegment',
402 'PackfileSegment',
405 def populatePMCList(self, parrot_root, ignore_blacklist):
406 pmc_pm = parrot_root + "/lib/Parrot/PMC.pm"
407 pmc_f = open(pmc_pm, 'r')
408 for line in pmc_f:
409 if re.search('\t[a-zA-Z]+ => [0-9]+,', line):
410 line = re.sub('\t', '', line)
411 line = re.sub(' =>.*\n', '', line)
412 if ignore_blacklist or line not in self.pmc_blacklist:
413 self.pmc_list.append(line)
415 def getPMCType(self):
416 return random.choice(self.pmc_list)
419 class OpfuncGenerator:
420 opfunc_list = []
421 opfunc_blacklist = [
422 'check_events', #only for testing
423 'check_events__', #not for direct use
424 'clears', #clearing all [SPIN] registers isn't useful
425 'clearp',
426 'cleari',
427 'clearn',
428 'cpu_ret',
429 'debug',
430 'debug_break',
431 'debug_init',
432 'debug_load',
433 'debug_print',
434 'die',
435 'enternative',
436 'exit',
437 'gc_debug',
438 'if',
439 'pic_callr__',
440 'pic_get_params__',
441 'pic_infix__',
442 'pic_inline_sub__',
443 'pic_set_returns__',
444 'pin',
445 'pop_eh',
446 'prederef__',
447 'profile',
448 'push_eh',
449 'returncc',
450 'rethrow',
451 'runinterp',
452 'setn_ind',
453 'sets_ind',
454 'seti_ind',
455 'setp_ind',
456 'sleep',
457 'tailcall',
458 'trace',
459 'trap',
460 'unless',
461 'unpin',
462 'wrapper__',
463 'yield',
466 def populateOpfuncList(self, parrot_root, ignore_blacklist):
467 ops_h = parrot_root + "/src/ops/core_ops.c"
468 ops_f = open(ops_h, 'r')
469 #This is a moderately fragile hack that relies on the specific
470 #format of some generated code. Expect breakage.
471 for line in ops_f:
472 if line.find('PARROT_INLINE_OP') > -1 or line.find('PARROT_FUNCTION_OP') > -1:
473 line = ops_f.next()
474 short_name = line
475 line = ops_f.next()
476 long_name = line
477 #strip leading space and opening double-quote
478 short_name = re.sub('[ ]+"', '', short_name)
479 long_name = re.sub('[ ]+"', '', long_name)
480 #strip everything after closing double-quote
481 short_name = re.sub('".*\n', '', short_name)
482 long_name = re.sub('".*\n', '', long_name)
484 if long_name == short_name:
485 sig = ''
486 else:
487 sig = string.replace(long_name, short_name + '_', '')
489 #XXX: don't know how to handle these args
490 if (not re.search('(pc|k|ki|kc|kic)', sig)):
491 if ignore_blacklist or short_name not in self.opfunc_blacklist:
492 self.opfunc_list.append([short_name, sig])
493 # print "accepted "+long_name+"("+sig+")"
494 #else:
495 # print "REJECTED "+long_name+"("+sig+")"
497 def getOpfunc(self):
498 return random.choice(self.opfunc_list)
500 class ParrotProcess(CreateProcess):
501 def on_pir_source(self, filename):
502 self.cmdline.arguments[1] = filename
503 self.createProcess()
505 if __name__ == "__main__":
506 ParrotFuzzer().main()