tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / t / data / test.meme2
blobcd5c24d180fc5ff5fe6f648aedf8ae215a0ae5e8
1 ********************************************************************************
2 MEME - Motif discovery tool
3 ********************************************************************************
4 MEME version 3.0 (Release date: 2002/04/02 00:11:59)
6 For further information on how to interpret these results or to get
7 a copy of the MEME software please access http://meme.sdsc.edu.
9 This file may be used as input to the MAST algorithm for searching
10 sequence databases for matches to groups of motifs.  MAST is available
11 for interactive use and downloading at http://meme.sdsc.edu.
12 ********************************************************************************
15 ********************************************************************************
16 REFERENCE
17 ********************************************************************************
18 If you use this program in your research, please cite:
20 Timothy L. Bailey and Charles Elkan,
21 "Fitting a mixture model by expectation maximization to discover
22 motifs in biopolymers", Proceedings of the Second International
23 Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
24 AAAI Press, Menlo Park, California, 1994.
25 ********************************************************************************
28 ********************************************************************************
29 TRAINING SET
30 ********************************************************************************
31 DATAFILE= test.fasta
32 ALPHABET= ACGT
33 Sequence name            Weight Length  Sequence name            Weight Length  
34 -------------            ------ ------  -------------            ------ ------  
35 68723                    1.0000   2000  16939                    1.0000   2001  
36 20754                    1.0000   2001  6707                     1.0000   2000  
37 20755                    1.0000   2000  6700                     1.0000   2002  
38 20760                    1.0000   2000  20761                    1.0000   2000  
39 20762                    1.0000   2000  
40 ********************************************************************************
42 ********************************************************************************
43 COMMAND LINE SUMMARY
44 ********************************************************************************
45 This information can also be useful in the event you wish to report a
46 problem with the MEME software.
48 command: meme test.fasta -dna -nostatus -nmotifs 2 -minsites 8 -maxw 20 -revcomp 
50 model:  mod=         zoops    nmotifs=         2    evt=           inf
51 object function=  E-value of product of p-values
52 width:  minw=            8    maxw=           20    minic=        0.00
53 width:  wg=             11    ws=              1    endgaps=       yes
54 nsites: minsites=        8    maxsites=        9    wnsites=       0.8
55 theta:  prob=            1    spmap=         uni    spfuzz=        0.5
56 em:     prior=   dirichlet    b=            0.01    maxiter=        50
57         distance=    1e-05
58 data:   n=           18004    N=               9
59 strands: + -
60 sample: seed=            0    seqfrac=         1
61 Letter frequencies in dataset:
62 A 0.295 C 0.205 G 0.205 T 0.295 
63 Background letter frequencies (from dataset with add-one prior applied):
64 A 0.295 C 0.205 G 0.205 T 0.295 
65 ********************************************************************************
68 ********************************************************************************
69 MOTIF  1        width =   20   sites =   8   llr = 147   E-value = 1.3e-002
70 ********************************************************************************
71 --------------------------------------------------------------------------------
72         Motif 1 Description
73 --------------------------------------------------------------------------------
74 Simplified        A  ::a1::931:6:348:1::1
75 pos.-specific     C  aa:::8:11841:331:139
76 probability       G  :::9::::63::84::3:8:
77 matrix            T  ::::a3161::9:::969::
79          bits    2.3 **                  
80                  2.1 **                  
81                  1.8 *** *               
82                  1.6 *****              *
83 Information      1.4 ******   * **  * ***
84 content          1.1 *******  * ** ** ***
85 (26.5 bits)      0.9 *******  **** ** ***
86                  0.7 ******* ***** ******
87                  0.5 ********************
88                  0.2 ********************
89                  0.0 --------------------
91 Multilevel           CCAGTCATGCATGAATTTGC
92 consensus                 T A GC AGC G C 
93 sequence                          C      
94                                          
95 --------------------------------------------------------------------------------
97 --------------------------------------------------------------------------------
98         Motif 1 sites sorted by position p-value
99 --------------------------------------------------------------------------------
100 Sequence name            Strand  Start   P-value                    Site      
101 -------------            ------  ----- ---------            --------------------
102 20761                        +   1879  6.50e-13 TCTGATTAAG CCAGTCATGCATGGATTTGC ATTTTGGTTG
103 20760                        +   1875  6.50e-13 CCCAGTCACG CCAGTCATGCATGGATTTGC ATTTTGATTG
104 6700                         +   1100  2.27e-10 CCTGCTCATG CCAGTCATGGATAAATTTGC ATCTGGCTTA
105 20755                        +   1478  5.08e-10 CCCTGTCAGG CCAGTTATGGATGAATGTGC ACTTAANNNN
106 6707                         +   1431  6.11e-09 TCACACAGAT CCAGTCAATCCTGCCTGTCC ATCTCAATGA
107 20762                        +   1878  1.89e-08 CCTGGTTAGG CCAGTTAAACACAGATTTGC ATTTTGGTTA
108 16939                        -    914  2.01e-08 ACTTTTCCTT CCAATCATGCCTGCCCTTGA ACCCTATTGG
109 20754                        +   1175  6.73e-08 GCTCACCTTG CCAGTCTCCCCTGAATACCC TACATGCCCT
110 --------------------------------------------------------------------------------
112 --------------------------------------------------------------------------------
113         Motif 1 block diagrams
114 --------------------------------------------------------------------------------
115 SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
116 -------------            ----------------  -------------
117 20761                             6.5e-13  1878_[+1]_102
118 20760                             6.5e-13  1874_[+1]_106
119 6700                              2.3e-10  1099_[+1]_883
120 20755                             5.1e-10  1477_[+1]_503
121 6707                              6.1e-09  1430_[+1]_550
122 20762                             1.9e-08  1877_[+1]_103
123 16939                               2e-08  913_[-1]_1068
124 20754                             6.7e-08  1174_[+1]_807
125 --------------------------------------------------------------------------------
127 --------------------------------------------------------------------------------
128         Motif 1 in BLOCKS format
129 --------------------------------------------------------------------------------
130 BL   MOTIF 1 width=20 seqs=8
131 20761                    ( 1879) CCAGTCATGCATGGATTTGC  1 
132 20760                    ( 1875) CCAGTCATGCATGGATTTGC  1 
133 6700                     ( 1100) CCAGTCATGGATAAATTTGC  1 
134 20755                    ( 1478) CCAGTTATGGATGAATGTGC  1 
135 6707                     ( 1431) CCAGTCAATCCTGCCTGTCC  1 
136 20762                    ( 1878) CCAGTTAAACACAGATTTGC  1 
137 16939                    (  914) CCAATCATGCCTGCCCTTGA  1 
138 20754                    ( 1175) CCAGTCTCCCCTGAATACCC  1 
141 --------------------------------------------------------------------------------
143 --------------------------------------------------------------------------------
144         Motif 1 position-specific scoring matrix
145 --------------------------------------------------------------------------------
146 log-odds matrix: alength= 4 w= 20 n= 17833 bayes= 11.1216 E= 1.3e-002 
147   -965    229   -965   -965 
148   -965    229   -965   -965 
149    176   -965   -965   -965 
150   -124   -965    210   -965 
151   -965   -965   -965    176 
152   -965    187   -965    -24 
153    157   -965   -965   -124 
154    -24    -71   -965    108 
155   -124    -71    161   -124 
156   -965    187     29   -965 
157    108     87   -965   -965 
158   -965    -71   -965    157 
159    -24   -965    187   -965 
160     34     29     87   -965 
161    134     29   -965   -965 
162   -965    -71   -965    157 
163   -124   -965     29    108 
164   -965    -71   -965    157 
165   -965     29    187   -965 
166   -124    210   -965   -965 
167 --------------------------------------------------------------------------------
169 --------------------------------------------------------------------------------
170         Motif 1 position-specific probability matrix
171 --------------------------------------------------------------------------------
172 letter-probability matrix: alength= 4 w= 20 n= 17833 E= 1.3e-002 
173  0.000369  0.999007  0.000255  0.000369 
174  0.000369  0.999007  0.000255  0.000369 
175  0.999120  0.000255  0.000255  0.000369 
176  0.125213  0.000255  0.874163  0.000369 
177  0.000369  0.000255  0.000255  0.999120 
178  0.000369  0.749319  0.000255  0.250057 
179  0.874276  0.000255  0.000255  0.125213 
180  0.250057  0.125099  0.000255  0.624589 
181  0.125213  0.125099  0.624475  0.125213 
182  0.000369  0.749319  0.249943  0.000369 
183  0.624589  0.374787  0.000255  0.000369 
184  0.000369  0.125099  0.000255  0.874276 
185  0.250057  0.000255  0.749319  0.000369 
186  0.374901  0.249943  0.374787  0.000369 
187  0.749432  0.249943  0.000255  0.000369 
188  0.000369  0.125099  0.000255  0.874276 
189  0.125213  0.000255  0.249943  0.624589 
190  0.000369  0.125099  0.000255  0.874276 
191  0.000369  0.249943  0.749319  0.000369 
192  0.125213  0.874163  0.000255  0.000369 
193 --------------------------------------------------------------------------------
199 Time 75.70 secs.
201 ********************************************************************************
204 ********************************************************************************
205 MOTIF  2        width =   15   sites =   8   llr = 117   E-value = 1.2e+003
206 ********************************************************************************
207 --------------------------------------------------------------------------------
208         Motif 2 Description
209 --------------------------------------------------------------------------------
210 Simplified        A  :1a39:::::18:::
211 pos.-specific     C  ::::1:::8a:::::
212 probability       G  96:3::1:::::4::
213 matrix            T  13:5:a9a3:936aa
215          bits    2.3          *     
216                  2.1          *     
217                  1.8   *  * * *   **
218                  1.6 * *  * * *   **
219 Information      1.4 * * ******   **
220 content          1.1 * * *******  **
221 (21.0 bits)      0.9 * * ***********
222                  0.7 *** ***********
223                  0.5 ***************
224                  0.2 ***************
225                  0.0 ---------------
227 Multilevel           GGATATTTCCTATTT
228 consensus             T A    T  TG  
229 sequence                G           
230                                     
231 --------------------------------------------------------------------------------
233 --------------------------------------------------------------------------------
234         Motif 2 sites sorted by position p-value
235 --------------------------------------------------------------------------------
236 Sequence name            Strand  Start   P-value                 Site    
237 -------------            ------  ----- ---------            ---------------
238 20762                        +   1845  2.62e-09 TCCAGGAACA GGATATTTCCTATTT TTGAGAGTCC
239 6700                         +   1068  2.62e-09 TTTCAGAACA GGATATTTCCTATTT TGAGTATCCT
240 20755                        +   1445  2.84e-08 GCCAAGGGTG GGATATTTTCTATTT TGTAGAGTCC
241 20754                        -    664  5.62e-08 TTTCTTAGAA GGAAATTTCCTTGTT CTCTTTCTAT
242 20761                        +    670  1.06e-07 GAAGAAAAAG GAAGATTTCCTAGTT AACAATTCAA
243 68723                        -   1925  5.26e-07 TTGCTTTCTT TGAGATGTCCTAGTT CACTCCTAAA
244 20760                        -    651  5.56e-07 TTTAAACTTG GTAAATTTTCTTTTT CTTCACATTT
245 16939                        -   1616  6.78e-07 TAGTTCAGTT GTATCTTTCCAATTT TGATGTTTGG
246 --------------------------------------------------------------------------------
248 --------------------------------------------------------------------------------
249         Motif 2 block diagrams
250 --------------------------------------------------------------------------------
251 SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
252 -------------            ----------------  -------------
253 20762                             2.6e-09  1844_[+2]_141
254 6700                              2.6e-09  1067_[+2]_920
255 20755                             2.8e-08  1444_[+2]_541
256 20754                             5.6e-08  663_[-2]_1323
257 20761                             1.1e-07  669_[+2]_1316
258 68723                             5.3e-07  1924_[-2]_61
259 20760                             5.6e-07  650_[-2]_1335
260 16939                             6.8e-07  1615_[-2]_371
261 --------------------------------------------------------------------------------
263 --------------------------------------------------------------------------------
264         Motif 2 in BLOCKS format
265 --------------------------------------------------------------------------------
266 BL   MOTIF 2 width=15 seqs=8
267 20762                    ( 1845) GGATATTTCCTATTT  1 
268 6700                     ( 1068) GGATATTTCCTATTT  1 
269 20755                    ( 1445) GGATATTTTCTATTT  1 
270 20754                    (  664) GGAAATTTCCTTGTT  1 
271 20761                    (  670) GAAGATTTCCTAGTT  1 
272 68723                    ( 1925) TGAGATGTCCTAGTT  1 
273 20760                    (  651) GTAAATTTTCTTTTT  1 
274 16939                    ( 1616) GTATCTTTCCAATTT  1 
277 --------------------------------------------------------------------------------
279 --------------------------------------------------------------------------------
280         Motif 2 position-specific scoring matrix
281 --------------------------------------------------------------------------------
282 log-odds matrix: alength= 4 w= 15 n= 17878 bayes= 11.1253 E= 1.2e+003 
283   -965   -965    210   -124 
284   -124   -965    161    -24 
285    176   -965   -965   -965 
286    -24   -965     29     76 
287    157    -71   -965   -965 
288   -965   -965   -965    176 
289   -965   -965    -71    157 
290   -965   -965   -965    176 
291   -965    187   -965    -24 
292   -965    229   -965   -965 
293   -124   -965   -965    157 
294    134   -965   -965    -24 
295   -965   -965     87    108 
296   -965   -965   -965    176 
297   -965   -965   -965    176 
298 --------------------------------------------------------------------------------
300 --------------------------------------------------------------------------------
301         Motif 2 position-specific probability matrix
302 --------------------------------------------------------------------------------
303 letter-probability matrix: alength= 4 w= 15 n= 17878 E= 1.2e+003 
304  0.000369  0.000255  0.874163  0.125213 
305  0.125213  0.000255  0.624475  0.250057 
306  0.999120  0.000255  0.000255  0.000369 
307  0.250057  0.000255  0.249943  0.499745 
308  0.874276  0.125099  0.000255  0.000369 
309  0.000369  0.000255  0.000255  0.999120 
310  0.000369  0.000255  0.125099  0.874276 
311  0.000369  0.000255  0.000255  0.999120 
312  0.000369  0.749319  0.000255  0.250057 
313  0.000369  0.999007  0.000255  0.000369 
314  0.125213  0.000255  0.000255  0.874276 
315  0.749432  0.000255  0.000255  0.250057 
316  0.000369  0.000255  0.374787  0.624589 
317  0.000369  0.000255  0.000255  0.999120 
318  0.000369  0.000255  0.000255  0.999120 
319 --------------------------------------------------------------------------------
325 Time 152.20 secs.
327 ********************************************************************************
330 ********************************************************************************
331 SUMMARY OF MOTIFS
332 ********************************************************************************
334 --------------------------------------------------------------------------------
335         Combined block diagrams: non-overlapping sites with p-value < 0.0001
336 --------------------------------------------------------------------------------
337 SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM
338 -------------            ----------------  -------------
339 68723                            2.83e-04  473_[-1(8.43e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_35_[-1(3.15e-06)]_[+1(1.38e-05)]_9_[-1(7.89e-05)]_63_[-1(2.97e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_26_[+2(4.37e-06)]_46_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_6_[-1(7.97e-06)]_24_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_1_[-2(5.92e-05)]_279_[-1(8.43e-06)]_[+1(1.38e-05)]_[+2(5.60e-05)]_157_[-2(5.26e-07)]_61
340 16939                            3.50e-06  913_[-1(2.01e-08)]_682_[-2(6.78e-07)]_205_[+1(5.80e-05)]_146
341 20754                            1.05e-06  39_[-2(1.78e-05)]_4_[+1(4.78e-06)]_564_[-2(7.42e-05)]_6_[-2(5.62e-08)]_146_[+2(3.98e-05)]_335_[+1(6.73e-08)]_93_[+1(5.54e-05)]_694
342 6707                             2.26e-05  173_[+1(4.46e-05)]_655_[-2(5.60e-05)]_3_[+1(3.06e-06)]_12_[+1(4.78e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.93e-05)]_181_[-1(3.26e-05)]_251_[+1(6.11e-09)]_329_[+1(6.34e-05)]_201
343 20755                            5.26e-09  160_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3_[+1(6.41e-06)]_219_[-1(4.80e-05)]_962_[+2(2.84e-08)]_18_[+1(5.08e-10)]_6_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_14_[+1(7.97e-06)]_83
344 6700                             2.48e-10  48_[+1(7.97e-06)]_267_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_129_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3_[+1(4.41e-06)]_280_[+2(2.62e-09)]_17_[+1(2.27e-10)]_101_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+2(7.29e-05)]_[+1(6.41e-06)]_604_[-1(8.43e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_3
345 20760                            1.53e-10  259_[-1(6.20e-05)]_339_[-2(3.17e-05)]_17_[-2(5.56e-07)]_436_[-2(4.06e-05)]_7_[+1(2.95e-07)]_6_[-1(1.12e-05)]_705_[+1(6.50e-13)]_106
346 20761                            3.10e-11  397_[-2(1.10e-06)]_257_[+2(1.06e-07)]_382_[-2(5.60e-05)]_[+2(7.29e-05)]_[-1(3.24e-06)]_[+1(1.38e-05)]_[+1(1.38e-05)]_722_[+1(6.50e-13)]_102
347 20762                            1.72e-08  134_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_[+1(1.38e-05)]_92_[+2(9.22e-05)]_517_[+1(7.97e-06)]_7_[-1(4.78e-06)]_439_[+2(2.62e-09)]_18_[+1(1.89e-08)]_103
348 --------------------------------------------------------------------------------
350 ********************************************************************************
353 ********************************************************************************
354 Stopped because nmotifs = 2 reached.
355 ********************************************************************************
357 CPU: crick
359 ********************************************************************************