2 # BioPerl module for Bio::AlignIO::mega
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Jason Stajich <jason-at-bioperl.org>
8 # Copyright Jason Stajich
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
16 Bio::AlignIO::mega - Parse and Create MEGA format data files
21 my $alignio = Bio::AlignIO->new(-format => 'mega',
22 -file => 't/data/hemoglobinA.meg');
24 while( my $aln = $alignio->next_aln ) {
25 # process each alignment or convert to another format like NEXUS
30 This object handles reading and writing data streams in the MEGA
31 format (Kumar and Nei).
38 User feedback is an integral part of the evolution of this and other
39 Bioperl modules. Send your comments and suggestions preferably to
40 the Bioperl mailing list. Your participation is much appreciated.
42 bioperl-l@bioperl.org - General discussion
43 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
47 Please direct usage questions or support issues to the mailing list:
49 I<bioperl-l@bioperl.org>
51 rather than to the module maintainer directly. Many experienced and
52 reponsive experts will be able look at the problem and quickly
53 address it. Please include a thorough description of the problem
54 with code and data examples if at all possible.
58 Report bugs to the Bioperl bug tracking system to help us keep track
59 of the bugs and their resolution. Bug reports can be submitted the
62 https://github.com/bioperl/bioperl-live/issues
64 =head1 AUTHOR - Jason Stajich
66 Email jason-at-bioperl.org
70 The rest of the documentation details each of the object methods.
71 Internal methods are usually preceded with a _
76 # Let the code begin...
79 package Bio
::AlignIO
::mega
;
80 use vars
qw($MEGANAMELEN %VALID_TYPES $LINELEN $BLOCKLEN);
84 use Bio::LocatableSeq;
86 # symbols are changed due to MEGA's use of '.' for redundant sequences
92 %VALID_TYPES = map {$_, 1} qw( dna rna protein standard);
94 use base
qw(Bio::AlignIO);
100 Usage : $aln = $stream->next_aln()
101 Function: returns the next alignment in the stream.
102 Supports the following MEGA format features:
103 - The file has to start with '#mega'
104 - Reads in the name of the alignment from a comment
105 (anything after '!TITLE: ') .
106 - Reads in the format parameters datatype
108 Returns : L<Bio::Align::AlignI> object - returns 0 on end of file
118 my ($alphabet,%seqs);
119 local $Bio::LocatableSeq
::OTHER_SYMBOLS
= '\*\?\.';
120 local $Bio::LocatableSeq
::GAP_SYMBOLS
= '\-';
121 my $aln = Bio
::SimpleAlign
->new(-source
=> 'mega');
123 while( defined($entry = $self->_readline()) && ($entry =~ /^\s+$/) ) {}
125 $self->throw("Not a valid MEGA file! [#mega] not starting the file!")
126 unless $entry =~ /^#mega/i;
128 while( defined($entry = $self->_readline() ) ) {
130 if(/\!Title:\s*([^\;]+)\s*/i) { $aln->id($1)}
131 elsif( s/\!Format\s+([^\;]+)\s*/$1/ ) {
132 my (@fields) = split(/\s+/,$1);
133 foreach my $f ( @fields ) {
134 my ($name,$value) = split(/\=/,$f);
135 if( $name eq 'datatype' ) {
137 } elsif( $name eq 'identical' ) {
138 $aln->match_char($value);
139 } elsif( $name eq 'indel' ) {
140 $aln->gap_char($value);
148 while( defined($entry) ) {
149 if( $entry !~ /^\s+$/ ) {
150 # this is to skip the leading '#'
151 my $seqname = substr($entry,1,$MEGANAMELEN-1);
152 $seqname =~ s/(\S+)\s+$/$1/g;
153 my $line = substr($entry,$MEGANAMELEN);
155 if( ! defined $seqs{$seqname} ) {push @order, $seqname; }
156 $seqs{$seqname} .= $line;
158 $entry = $self->_readline();
161 foreach my $seqname ( @order ) {
162 my $s = $seqs{$seqname};
163 $s =~ s/[$Bio::LocatableSeq::GAP_SYMBOLS]+//g;
164 my $end = length($s);
165 my $seq = Bio
::LocatableSeq
->new('-alphabet' => $alphabet,
166 '-display_id' => $seqname,
167 '-seq' => $seqs{$seqname},
174 return $aln if $aln->num_sequences;
181 Usage : $stream->write_aln(@aln)
182 Function: writes the $aln object into the stream in MEGA format
183 Returns : 1 for success and 0 for error
184 Args : L<Bio::Align::AlignI> object
189 my ($self,@aln) = @_;
194 foreach my $aln ( @aln ) {
195 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
196 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
198 } elsif( ! $aln->is_flush($self->verbose) ) {
199 $self->warn("All Sequences in the alignment must be the same length");
203 my $len = $aln->length();
204 my $format = sprintf('datatype=%s identical=%s indel=%s;',
205 $aln->get_seq_by_pos(1)->alphabet(),
206 $aln->match_char, $aln->gap_char);
208 $self->_print(sprintf("#mega\n!Title: %s;\n!Format %s\n\n\n",
211 my ($count, $blockcount,$length) = ( 0,0,$aln->length());
212 $aln->set_displayname_flat();
213 while( $count < $length ) {
214 foreach my $seq ( $aln->each_seq ) {
215 my $seqchars = $seq->seq();
217 my $substring = substr($seqchars, $count, $LINELEN);
219 while( $blockcount < length($substring) ) {
220 push @blocks, substr($substring, $blockcount,$BLOCKLEN);
221 $blockcount += $BLOCKLEN;
223 $self->_print(sprintf("#%-".($MEGANAMELEN-1)."s%s\n",
224 substr($aln->displayname($seq->get_nse()),
226 join(' ', @blocks)));
232 $self->flush if $self->_flush_on_write && defined $self->_fh;