sync with trunk (to r15946)
[bioperl-live.git] / Bio / AlignIO / maf.pm
blob541bcc610bb69a7efec24040beaefe514e4403f6
1 # $Id$
3 # BioPerl module for Bio::AlignIO::maf
5 # Copyright Allen Day
8 =head1 NAME
10 Bio::AlignIO::maf - Multiple Alignment Format sequence input stream
12 =head1 SYNOPSIS
14 Do not use this module directly. Use it via the Bio::AlignIO class.
16 use Bio::AlignIO;
18 my $alignio = Bio::AlignIO->new(-fh => \*STDIN, -format => 'maf');
20 while(my $aln = $alignio->next_aln()){
21 my $match_line = $aln->match_line;
23 print $aln, "\n";
25 print $aln->length, "\n";
26 print $aln->num_residues, "\n";
27 print $aln->is_flush, "\n";
28 print $aln->num_sequences, "\n";
30 $aln->splice_by_seq_pos(1);
32 print $aln->consensus_string(60), "\n";
33 print $aln->get_seq_by_pos(1)->seq, "\n";
34 print $aln->match_line(), "\n";
36 print "\n";
39 =head1 DESCRIPTION
41 This class constructs Bio::SimpleAlign objects from an MAF-format
42 multiple alignment file.
44 Writing in MAF format is currently unimplemented.
46 Spec of MAF format is here:
47 http://genome.ucsc.edu/FAQ/FAQformat
49 =head1 FEEDBACK
51 =head2 Support
53 Please direct usage questions or support issues to the mailing list:
55 L<bioperl-l@bioperl.org>
57 rather than to the module maintainer directly. Many experienced and
58 reponsive experts will be able look at the problem and quickly
59 address it. Please include a thorough description of the problem
60 with code and data examples if at all possible.
62 =head2 Reporting Bugs
64 Report bugs to the Bioperl bug tracking system to help us keep track
65 the bugs and their resolution. Bug reports can be submitted via the
66 web:
68 http://bugzilla.open-bio.org/
70 =head1 AUTHORS - Allen Day
72 Email: allenday@ucla.edu
74 =head1 APPENDIX
76 The rest of the documentation details each of the object
77 methods. Internal methods are usually preceded with a _
79 =cut
81 # Let the code begin...
83 package Bio::AlignIO::maf;
84 use strict;
86 use Bio::SimpleAlign;
88 use base qw(Bio::AlignIO);
90 =head2 new
92 Title : new
93 Usage : my $alignio = Bio::AlignIO->new(-format => 'maf'
94 -file => '>file',
95 -idlength => 10,
96 -idlinebreak => 1);
97 Function: Initialize a new L<Bio::AlignIO::maf> reader
98 Returns : L<Bio::AlignIO> object
99 Args :
101 =cut
103 sub _initialize {
104 my($self,@args) = @_;
105 $self->SUPER::_initialize(@args);
110 =head2 next_aln
112 Title : next_aln
113 Usage : $aln = $stream->next_aln()
114 Function: returns the next alignment in the stream.
115 Throws an exception if trying to read in PHYLIP
116 sequential format.
117 Returns : L<Bio::SimpleAlign> object
118 Args :
120 =cut
122 sub next_aln {
123 my $self = shift;
125 # check beginning of file for proper header
126 if(!$self->{seen_header}){
127 my $line = $self->_readline;
128 $self->throw("This doesn't look like a MAF file. First line should start with ##maf, but it was: ".$line)
129 unless $line =~ /^##maf/;
130 $self->{seen_header} = 1;
131 # keep in case we parse this later
132 $self->_pushback($line);
135 my $aln = Bio::SimpleAlign->new(-source => 'maf');
137 my($aline, @slines, $seen_aline);
138 while(my $line = $self->_readline()){
139 if ($line =~ /^a\s/xms) {
140 # next block?
141 if ($seen_aline) {
142 $self->_pushback($line);
143 last;
145 $aline = $line;
146 $seen_aline++;
147 } elsif ($line =~ /^s\s/xms) {
148 push @slines, $line;
149 } else {
150 # missed lines
151 $self->debug($line);
155 # all MAF starts with 'a' line
156 return unless $aline;
158 my($kvs) = $aline =~ /^a\s+(.+)$/;
159 my @kvs = split /\s+/, $kvs if $kvs;
160 my %kv;
161 foreach my $kv (@kvs){
162 my($k,$v) = $kv =~ /(.+)=(.+)/;
163 $kv{$k} = $v;
166 $aln->score($kv{score});
168 foreach my $sline (@slines){
169 my($s,$src,$start,$size,$strand,$srcsize,$text) =
170 split /\s+/, $sline;
171 # adjust coordinates to be one-based inclusive
172 $start = $start + 1;
173 my $seq = Bio::LocatableSeq->new('-seq' => $text,
174 '-id' => $src,
175 '-start' => $start,
176 '-end' => $start + $size - 1,
177 '-strand' => $strand,
179 $aln->add_seq($seq);
182 return $aln if $aln->num_sequences;
183 return;
186 sub write_aln {
187 shift->throw_not_implemented