1 # Copyright (C) 2006-2009, Parrot Foundation.
5 load_bytecode "PGE.pbc"
6 .local pmc p6rule_compile, rulesub, match, variants, variants_p5, iub, it, matches, capt
7 .local string pattern, chunk, seq, key, replacement
8 .local int readlen, chunklen, seqlen, finallen, i, varnum, count
9 p6rule_compile = compreg "PGE::Perl6Regex"
11 # Store the regexes we need...
12 variants = new 'FixedStringArray'
14 variants[0] = ' agggtaaa|tttaccct '
15 variants[1] = '<[cgt]>gggtaaa|tttaccc<[acg]>'
16 variants[2] = 'a<[act]>ggtaaa|tttacc<[agt]>t'
17 variants[3] = 'ag<[act]>gtaaa|tttac<[agt]>ct'
18 variants[4] = 'agg<[act]>taaa|ttta<[agt]>cct'
19 variants[5] = 'aggg<[acg]>aaa|ttt<[cgt]>ccct'
20 variants[6] = 'agggt<[cgt]>aa|tt<[acg]>accct'
21 variants[7] = 'agggta<[cgt]>a|t<[acg]>taccct'
22 variants[8] = 'agggtaa<[cgt]>|<[acg]>ttaccct'
23 # and store the p5regex style for printing
24 variants_p5 = new 'Hash'
25 variants_p5[' agggtaaa|tttaccct '] = 'agggtaaa|tttaccct'
26 variants_p5['<[cgt]>gggtaaa|tttaccc<[acg]>'] = '[cgt]gggtaaa|tttaccc[acg]'
27 variants_p5['a<[act]>ggtaaa|tttacc<[agt]>t'] = 'a[act]ggtaaa|tttacc[agt]t'
28 variants_p5['ag<[act]>gtaaa|tttac<[agt]>ct'] = 'ag[act]gtaaa|tttac[agt]ct'
29 variants_p5['agg<[act]>taaa|ttta<[agt]>cct'] = 'agg[act]taaa|ttta[agt]cct'
30 variants_p5['aggg<[acg]>aaa|ttt<[cgt]>ccct'] = 'aggg[acg]aaa|ttt[cgt]ccct'
31 variants_p5['agggt<[cgt]>aa|tt<[acg]>accct'] = 'agggt[cgt]aa|tt[acg]accct'
32 variants_p5['agggta<[cgt]>a|t<[acg]>taccct'] = 'agggta[cgt]a|t[acg]taccct'
33 variants_p5['agggtaa<[cgt]>|<[acg]>ttaccct'] = 'agggtaa[cgt]|[acg]ttaccct'
41 iub['n'] = '(a|c|g|t)'
47 # seems faster with the other method...
48 # and this was the only regex I could get to work
49 # iub['[ <-[b]>*: (b) ]*'] = '(c|g|t)'
50 # iub['[ <-[d]>*: (d) ]*'] = '(a|g|t)'
51 # iub['[ <-[h]>*: (h) ]*'] = '(a|c|t)'
52 # iub['[ <-[k]>*: (k) ]*'] = '(g|t)'
53 # iub['[ <-[m]>*: (m) ]*'] = '(a|c)'
54 # iub['[ <-[n]>*: (n) ]*'] = '(a|c|g|t)'
55 # iub['[ <-[r]>*: (r) ]*'] = '(a|g)'
56 # iub['[ <-[s]>*: (s) ]*'] = '(c|g)'
57 # iub['[ <-[v]>*: (v) ]*'] = '(a|c|g)'
58 # iub['[ <-[w]>*: (w) ]*'] = '(a|t)'
59 # iub['[ <-[y]>*: (y) ]*'] = '(c|t)'
61 ############################################
65 chunklen = length chunk
66 unless chunklen goto endwhile
67 # They don't say you have to match case insenitive...
74 #############################################
76 pattern = '[ ( [ \> \N*: ] ) | \N*:(\n) ]*'
77 rulesub = p6rule_compile(pattern)
82 unless capt goto endstripfind
87 substr seq, $I0, $I1, ''
92 ###########################################
94 varnum = elements variants
98 unless i < varnum goto endfor
100 pattern = variants[i]
101 # The spec says to print the p5 style regex, shame on them
102 $S0 = variants_p5[pattern]
105 # And out of spite, use p6 rules anyway
106 rulesub = p6rule_compile(pattern)
110 unless match goto next
121 #####################################################
122 # Final replace to make the sequence a p5 style regex
123 .include "iterator.pasm"
125 set it, .ITERATE_FROM_START
126 matches = new 'ResizablePMCArray'
128 unless it goto iter_end
130 replacement = iub[key]
131 # Ok, using a regex to match a single fixed character is probably excessive
132 # But it's what's wanted...
133 rulesub = p6rule_compile(key)
138 # unless capt goto endswitchfind
143 # substr seq, $I0, $I1, replacement
147 ##########################################
149 unless match goto endswitch
153 $P0 = new 'FixedIntegerArray'
163 unless matches goto endswitchloop
167 substr seq, $I0, $I1, replacement
170 #############################################
173 finallen = length seq
188 # vim: expandtab shiftwidth=4 ft=pir: