empty directory
[bioperl-live.git] / t / data / test.meme
blob1aedbfc75a9cda1f69d3f0f455b0dfb4f7c9ceb5
1 ********************************************************************************
2 MEME - Motif discovery tool
3 ********************************************************************************
4 MEME version 3.0 (Release date: 2001/03/05 14:24:28)
6 For further information on how to interpret these results or to get
7 a copy of the MEME software please access http://meme.sdsc.edu.
9 This file may be used as input to the MAST algorithm for searching
10 sequence databases for matches to groups of motifs.  MAST is available
11 for interactive use and downloading at http://meme.sdsc.edu.
12 ********************************************************************************
15 ********************************************************************************
16 REFERENCE
17 ********************************************************************************
18 If you use this program in your research, please cite:
20 Timothy L. Bailey and Charles Elkan,
21 "Fitting a mixture model by expectation maximization to discover
22 motifs in biopolymers", Proceedings of the Second International
23 Conference on Intelligent Systems for Molecular Biology, pp. 28-36,
24 AAAI Press, Menlo Park, California, 1994.
25 ********************************************************************************
28 ********************************************************************************
29 TRAINING SET
30 ********************************************************************************
31 DATAFILE= D10Mit194.set.genbank.fasta.nref
32 ALPHABET= ACGT
33 Sequence name           Weight Length  Sequence name           Weight Length  
34 -------------           ------ ------  -------------           ------ ------  
35 20218                   1.0000   2000  10657                   1.0000   2000  
36 83796                   1.0000   2000  6603                    1.0000   2000  
37 ********************************************************************************
39 ********************************************************************************
40 COMMAND LINE SUMMARY
41 ********************************************************************************
42 This information can also be useful in the event you wish to report a
43 problem with the MEME software.
45 command: meme D10Mit194.set.genbank.fasta.nref -dna -print_fasta -nmotifs 3 -maxw 25 
47 model:  mod=         zoops    nmotifs=         3    evt=           inf
48 object function=  E-value of product of p-values
49 width:  minw=            8    maxw=           25    minic=        0.00
50 width:  wg=             11    ws=              1    endgaps=       yes
51 nsites: minsites=        2    maxsites=        4    wnsites=       0.8
52 theta:  prob=            1    spmap=         uni    spfuzz=        0.5
53 em:     prior=   dirichlet    b=            0.01    maxiter=        50
54         distance=    1e-05
55 data:   n=            8000    N=               4
56 strands: +
57 sample: seed=            0    seqfrac=         1
58 Letter frequencies in dataset:
59 A 0.255 C 0.236 G 0.252 T 0.257 
60 Background letter frequencies (from dataset with add-one prior applied):
61 A 0.255 C 0.236 G 0.252 T 0.257 
62 ********************************************************************************
65 ********************************************************************************
66 MOTIF  1        width =   25   sites =   4   llr = 106   E-value = 1.2e-002
67 ********************************************************************************
68 --------------------------------------------------------------------------------
69         Motif 1 Description
70 --------------------------------------------------------------------------------
71 Simplified        A  :8:88aaa:53a8:::3:a::3::3
72 pos.-specific     C  a3::3:::::3:::5a8a:8a5aa8
73 probability       G  ::8:::::3:5::::::::3:::::
74 matrix            T  ::33::::85::3a5::::::3:::
76          bits    2.1 *              * *  * ** 
77                  1.9 *    ***   * * * ** * ** 
78                  1.7 *    ***   * * * ** * ** 
79                  1.5 *    ***   * * * ** * ** 
80 Information      1.2 *********  *** ****** ***
81 content          1.0 ********** ********** ***
82 (38.1 bits)      0.8 ********** ********** ***
83                  0.6 ********** **************
84                  0.4 *************************
85                  0.2 *************************
86                  0.0 -------------------------
88 Multilevel           CAGAAAAATAGAATCCCCACCCCCC
89 consensus             CTTC   GTA T T A  G A  A
90 sequence                       C          T   
91                                               
92 --------------------------------------------------------------------------------
94 --------------------------------------------------------------------------------
95         Motif 1 sites sorted by position p-value
96 --------------------------------------------------------------------------------
97 Sequence name             Start   P-value                      Site         
98 -------------             ----- ---------            -------------------------
99 6603                       1311  2.59e-15 GGCGCATTGA CAGAAAAATTGAATTCCCACCCCCC AATGAGGAGG
100 83796                      1284  2.59e-15 GGAGGATTGA CAGAAAAATTGAATTCCCACCCCCC AACGAGGAGG
101 20218                       938  6.34e-12 TTTTTGGTAA CCTTAAAATAAAATCCCCACCACCA CTTTTAAAAA
102 10657                      1685  8.70e-12 GGCCCGCGCG CAGACAAAGACATTCCACAGCTCCC GCCCCCTCCA
103 --------------------------------------------------------------------------------
105 --------------------------------------------------------------------------------
106         Motif 1 block diagrams
107 --------------------------------------------------------------------------------
108 SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
109 -------------            ----------------  -------------
110 6603                              2.6e-15  1310_[1]_665
111 83796                             2.6e-15  1283_[1]_692
112 20218                             6.3e-12  937_[1]_1038
113 10657                             8.7e-12  1684_[1]_291
114 --------------------------------------------------------------------------------
116 --------------------------------------------------------------------------------
117         Motif 1 in FASTA format
118 --------------------------------------------------------------------------------
119 >6603                     pos 1311
120 CAGAAAAATTGAATTCCCACCCCCC
121 >83796                    pos 1284
122 CAGAAAAATTGAATTCCCACCCCCC
123 >20218                    pos  938
124 CCTTAAAATAAAATCCCCACCACCA
125 >10657                    pos 1685
126 CAGACAAAGACATTCCACAGCTCCC
127 --------------------------------------------------------------------------------
129 --------------------------------------------------------------------------------
130         Motif 1 position-specific scoring matrix
131 --------------------------------------------------------------------------------
132 log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 11.6849 E= 1.2e-002 
133   -865    208   -865   -865 
134    156      8   -865   -865 
135   -865   -865    157     -4 
136    156   -865   -865     -4 
137    156      8   -865   -865 
138    197   -865   -865   -865 
139    197   -865   -865   -865 
140    197   -865   -865   -865 
141   -865   -865     -1    154 
142     97   -865   -865     96 
143     -3      8     99   -865 
144    197   -865   -865   -865 
145    156   -865   -865     -4 
146   -865   -865   -865    196 
147   -865    108   -865     96 
148   -865    208   -865   -865 
149     -3    166   -865   -865 
150   -865    208   -865   -865 
151    197   -865   -865   -865 
152   -865    166     -1   -865 
153   -865    208   -865   -865 
154     -3    108   -865     -4 
155   -865    208   -865   -865 
156   -865    208   -865   -865 
157     -3    166   -865   -865 
158 --------------------------------------------------------------------------------
160 --------------------------------------------------------------------------------
161         Motif 1 position-specific probability matrix
162 --------------------------------------------------------------------------------
163 letter-probability matrix: alength= 4 w= 25 n= 7904 E= 1.2e-002 
164  0.000635  0.998096  0.000629  0.000640 
165  0.748765  0.249966  0.000629  0.000640 
166  0.000635  0.000589  0.748759  0.250017 
167  0.748765  0.000589  0.000629  0.250017 
168  0.748765  0.249966  0.000629  0.000640 
169  0.998141  0.000589  0.000629  0.000640 
170  0.998141  0.000589  0.000629  0.000640 
171  0.998141  0.000589  0.000629  0.000640 
172  0.000635  0.000589  0.250006  0.748770 
173  0.499388  0.000589  0.000629  0.499393 
174  0.250012  0.249966  0.499382  0.000640 
175  0.998141  0.000589  0.000629  0.000640 
176  0.748765  0.000589  0.000629  0.250017 
177  0.000635  0.000589  0.000629  0.998147 
178  0.000635  0.499343  0.000629  0.499393 
179  0.000635  0.998096  0.000629  0.000640 
180  0.250012  0.748719  0.000629  0.000640 
181  0.000635  0.998096  0.000629  0.000640 
182  0.998141  0.000589  0.000629  0.000640 
183  0.000635  0.748719  0.250006  0.000640 
184  0.000635  0.998096  0.000629  0.000640 
185  0.250012  0.499343  0.000629  0.250017 
186  0.000635  0.998096  0.000629  0.000640 
187  0.000635  0.998096  0.000629  0.000640 
188  0.250012  0.748719  0.000629  0.000640 
189 --------------------------------------------------------------------------------
195 Time 38.44 secs.
197 ********************************************************************************
200 ********************************************************************************
201 MOTIF  2        width =   25   sites =   4   llr = 101   E-value = 4.5e-001
202 ********************************************************************************
203 --------------------------------------------------------------------------------
204         Motif 2 Description
205 --------------------------------------------------------------------------------
206 Simplified        A  ::::a::383:5:3:::a33:83:a
207 pos.-specific     C  a3a8:3a33:a::5::8::::383:
208 probability       G  ::::::::::::::::3:38a::8:
209 matrix            T  :8:3:8:5:8:5a3aa::5::::::
211          bits    2.1 * *   *   *         *    
212                  1.9 * * * *   * * ** *  *   *
213                  1.7 * * * *   * * ** *  *   *
214                  1.5 * * * *   * * ** *  *   *
215 Information      1.2 ******* *** * **** ******
216 content          1.0 ******* ***** **** ******
217 (36.6 bits)      0.8 ******* ***** **** ******
218                  0.6 ******* ********** ******
219                  0.4 *************************
220                  0.2 *************************
221                  0.0 -------------------------
223 Multilevel           CTCCATCTATCATCTTCATGGACGA
224 consensus             C T C ACA T A  G AA CAC 
225 sequence                    C     T    G      
226                                               
227 --------------------------------------------------------------------------------
229 --------------------------------------------------------------------------------
230         Motif 2 sites sorted by position p-value
231 --------------------------------------------------------------------------------
232 Sequence name             Start   P-value                      Site         
233 -------------             ----- ---------            -------------------------
234 6603                       1000  1.62e-15 CGGGAACATG CTCCATCTATCATCTTCATGGACGA AATCGACTCC
235 83796                       978  4.69e-15 CGAGAACATG CTCCATCCATCATCTTCATGGACGA GATTGACTCT
236 20218                      1545  1.69e-11 TAGCTTCTCT CCCCATCAATCTTATTCAGAGCCCA CCCCTCCCCC
237 10657                      1075  3.40e-11 AGGATCTGGT CTCTACCTCACTTTTTGAAGGAAGA AACACTTAAT
238 --------------------------------------------------------------------------------
240 --------------------------------------------------------------------------------
241         Motif 2 block diagrams
242 --------------------------------------------------------------------------------
243 SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
244 -------------            ----------------  -------------
245 6603                              1.6e-15  999_[2]_976
246 83796                             4.7e-15  977_[2]_998
247 20218                             1.7e-11  1544_[2]_431
248 10657                             3.4e-11  1074_[2]_901
249 --------------------------------------------------------------------------------
251 --------------------------------------------------------------------------------
252         Motif 2 in FASTA format
253 --------------------------------------------------------------------------------
254 >6603                     pos 1000
255 CTCCATCTATCATCTTCATGGACGA
256 >83796                    pos  978
257 CTCCATCCATCATCTTCATGGACGA
258 >20218                    pos 1545
259 CCCCATCAATCTTATTCAGAGCCCA
260 >10657                    pos 1075
261 CTCTACCTCACTTTTTGAAGGAAGA
262 --------------------------------------------------------------------------------
264 --------------------------------------------------------------------------------
265         Motif 2 position-specific scoring matrix
266 --------------------------------------------------------------------------------
267 log-odds matrix: alength= 4 w= 25 n= 7904 bayes= 10.9476 E= 4.5e-001 
268   -865    208   -865   -865 
269   -865      8   -865    154 
270   -865    208   -865   -865 
271   -865    166   -865     -4 
272    197   -865   -865   -865 
273   -865      8   -865    154 
274   -865    208   -865   -865 
275     -3      8   -865     96 
276    156      8   -865   -865 
277     -3   -865   -865    154 
278   -865    208   -865   -865 
279     97   -865   -865     96 
280   -865   -865   -865    196 
281     -3    108   -865     -4 
282   -865   -865   -865    196 
283   -865   -865   -865    196 
284   -865    166     -1   -865 
285    197   -865   -865   -865 
286     -3   -865     -1     96 
287     -3   -865    157   -865 
288   -865   -865    198   -865 
289    156      8   -865   -865 
290     -3    166   -865   -865 
291   -865      8    157   -865 
292    197   -865   -865   -865 
293 --------------------------------------------------------------------------------
295 --------------------------------------------------------------------------------
296         Motif 2 position-specific probability matrix
297 --------------------------------------------------------------------------------
298 letter-probability matrix: alength= 4 w= 25 n= 7904 E= 4.5e-001 
299  0.000635  0.998096  0.000629  0.000640 
300  0.000635  0.249966  0.000629  0.748770 
301  0.000635  0.998096  0.000629  0.000640 
302  0.000635  0.748719  0.000629  0.250017 
303  0.998141  0.000589  0.000629  0.000640 
304  0.000635  0.249966  0.000629  0.748770 
305  0.000635  0.998096  0.000629  0.000640 
306  0.250012  0.249966  0.000629  0.499393 
307  0.748765  0.249966  0.000629  0.000640 
308  0.250012  0.000589  0.000629  0.748770 
309  0.000635  0.998096  0.000629  0.000640 
310  0.499388  0.000589  0.000629  0.499393 
311  0.000635  0.000589  0.000629  0.998147 
312  0.250012  0.499343  0.000629  0.250017 
313  0.000635  0.000589  0.000629  0.998147 
314  0.000635  0.000589  0.000629  0.998147 
315  0.000635  0.748719  0.250006  0.000640 
316  0.998141  0.000589  0.000629  0.000640 
317  0.250012  0.000589  0.250006  0.499393 
318  0.250012  0.000589  0.748759  0.000640 
319  0.000635  0.000589  0.998135  0.000640 
320  0.748765  0.249966  0.000629  0.000640 
321  0.250012  0.748719  0.000629  0.000640 
322  0.000635  0.249966  0.748759  0.000640 
323  0.998141  0.000589  0.000629  0.000640 
324 --------------------------------------------------------------------------------
330 Time 78.29 secs.
332 ********************************************************************************
335 ********************************************************************************
336 MOTIF  3        width =   21   sites =   4   llr = 88   E-value = 4.8e-001
337 ********************************************************************************
338 --------------------------------------------------------------------------------
339         Motif 3 Description
340 --------------------------------------------------------------------------------
341 Simplified        A  :::58:::::3::::33::::
342 pos.-specific     C  8:833::8:8:a:8a8:a5a8
343 probability       G  3::::::3:::::3:::::::
344 matrix            T  :a33:aa:a38:a:::8:5:3
346          bits    2.1            *  *  * * 
347                  1.9  *   ** *  ** *  * * 
348                  1.7  *   ** *  ** *  * * 
349                  1.5  *   ** *  ** *  * * 
350 Information      1.2 *** ************** **
351 content          1.0 *** *****************
352 (31.8 bits)      0.8 *** *****************
353                  0.6 *** *****************
354                  0.4 *********************
355                  0.2 *********************
356                  0.0 ---------------------
358 Multilevel           CTCAATTCTCTCTCCCTCCCC
359 consensus            G TCC  G TA  G AA T T
360 sequence                T                 
361                                           
362 --------------------------------------------------------------------------------
364 --------------------------------------------------------------------------------
365         Motif 3 sites sorted by position p-value
366 --------------------------------------------------------------------------------
367 Sequence name             Start   P-value                    Site       
368 -------------             ----- ---------            ---------------------
369 10657                      1511  1.45e-13 CCCAGGCGGT CTCAATTCTCTCTCCCTCCCC TTTCCGTGAC
370 83796                      1801  7.40e-12 TGTATATGCA CTCTCTTCTCTCTCCCTCTCC AGGTCATGCA
371 6603                       1811  1.22e-10 GTAACTTAAT GTTCATTCTCTCTCCCACCCC TAGGTCATGC
372 20218                       606  7.57e-10 CCCAGGCCAG CTCAATTGTTACTGCATCTCT AGGATTGGAA
373 --------------------------------------------------------------------------------
375 --------------------------------------------------------------------------------
376         Motif 3 block diagrams
377 --------------------------------------------------------------------------------
378 SEQUENCE NAME            POSITION P-VALUE  MOTIF DIAGRAM
379 -------------            ----------------  -------------
380 10657                             1.5e-13  1510_[3]_469
381 83796                             7.4e-12  1800_[3]_179
382 6603                              1.2e-10  1810_[3]_169
383 20218                             7.6e-10  605_[3]_1374
384 --------------------------------------------------------------------------------
386 --------------------------------------------------------------------------------
387         Motif 3 in FASTA format
388 --------------------------------------------------------------------------------
389 >10657                    pos 1511
390 CTCAATTCTCTCTCCCTCCCC
391 >83796                    pos 1801
392 CTCTCTTCTCTCTCCCTCTCC
393 >6603                     pos 1811
394 GTTCATTCTCTCTCCCACCCC
395 >20218                    pos  606
396 CTCAATTGTTACTGCATCTCT
397 --------------------------------------------------------------------------------
399 --------------------------------------------------------------------------------
400         Motif 3 position-specific scoring matrix
401 --------------------------------------------------------------------------------
402 log-odds matrix: alength= 4 w= 21 n= 7920 bayes= 10.9506 E= 4.8e-001 
403   -865    166     -1   -865 
404   -865   -865   -865    196 
405   -865    166   -865     -4 
406     97      8   -865     -4 
407    156      8   -865   -865 
408   -865   -865   -865    196 
409   -865   -865   -865    196 
410   -865    166     -1   -865 
411   -865   -865   -865    196 
412   -865    166   -865     -4 
413     -3   -865   -865    154 
414   -865    208   -865   -865 
415   -865   -865   -865    196 
416   -865    166     -1   -865 
417   -865    208   -865   -865 
418     -3    166   -865   -865 
419     -3   -865   -865    154 
420   -865    208   -865   -865 
421   -865    108   -865     96 
422   -865    208   -865   -865 
423   -865    166   -865     -4 
424 --------------------------------------------------------------------------------
426 --------------------------------------------------------------------------------
427         Motif 3 position-specific probability matrix
428 --------------------------------------------------------------------------------
429 letter-probability matrix: alength= 4 w= 21 n= 7920 E= 4.8e-001 
430  0.000635  0.748719  0.250006  0.000640 
431  0.000635  0.000589  0.000629  0.998147 
432  0.000635  0.748719  0.000629  0.250017 
433  0.499388  0.249966  0.000629  0.250017 
434  0.748765  0.249966  0.000629  0.000640 
435  0.000635  0.000589  0.000629  0.998147 
436  0.000635  0.000589  0.000629  0.998147 
437  0.000635  0.748719  0.250006  0.000640 
438  0.000635  0.000589  0.000629  0.998147 
439  0.000635  0.748719  0.000629  0.250017 
440  0.250012  0.000589  0.000629  0.748770 
441  0.000635  0.998096  0.000629  0.000640 
442  0.000635  0.000589  0.000629  0.998147 
443  0.000635  0.748719  0.250006  0.000640 
444  0.000635  0.998096  0.000629  0.000640 
445  0.250012  0.748719  0.000629  0.000640 
446  0.250012  0.000589  0.000629  0.748770 
447  0.000635  0.998096  0.000629  0.000640 
448  0.000635  0.499343  0.000629  0.499393 
449  0.000635  0.998096  0.000629  0.000640 
450  0.000635  0.748719  0.000629  0.250017 
451 --------------------------------------------------------------------------------
457 Time 117.82 secs.
459 ********************************************************************************
462 ********************************************************************************
463 SUMMARY OF MOTIFS
464 ********************************************************************************
466 --------------------------------------------------------------------------------
467         Combined block diagrams: non-overlapping sites with p-value < 0.0001
468 --------------------------------------------------------------------------------
469 SEQUENCE NAME            COMBINED P-VALUE  MOTIF DIAGRAM
470 -------------            ----------------  -------------
471 20218                            7.77e-19  605_[3(7.57e-10)]_311_[1(6.34e-12)]_582_[2(1.69e-11)]_431
472 10657                            5.46e-22  1_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_7_[1(6.00e-08)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_5_[1(1.18e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_4_[1(1.29e-07)]_332_[2(3.40e-11)]_383_[3(7.75e-07)]_7_[3(1.45e-13)]_56_[3(3.47e-05)]_76_[1(8.70e-12)]_291
473 83796                            1.73e-27  977_[2(4.69e-15)]_281_[1(2.59e-15)]_492_[3(7.40e-12)]_179
474 6603                             9.32e-27  597_[1(1.75e-07)]_[1(1.75e-07)]_[1(1.75e-07)]_327_[2(1.62e-15)]_286_[1(2.59e-15)]_475_[3(1.22e-10)]_169
475 --------------------------------------------------------------------------------
477 ********************************************************************************
480 ********************************************************************************
481 Stopped because nmotifs = 3 reached.
482 ********************************************************************************
484 CPU: hydra-1.lsd.ornl.gov
486 ********************************************************************************