sync w/ main trunk
[bioperl-live.git] / Bio / Matrix / PSM / PsmHeader.pm
blob9b5faef95047405e1ebe11f8b145f79ed371610d
1 # $Id$
3 =head1 NAME
5 Bio::Matrix::PSM::PsmHeader - PSM mast parser implementation
7 =head1 SYNOPSIS
9 # See Bio::Matrix::PSM::IO for detailed documentation on how to use
10 # PSM parsers
12 =head1 DESCRIPTION
14 Parser for mast. This driver unlike meme or transfac for example is
15 dedicated more to PSM sequence matches
17 =head1 FEEDBACK
19 =head2 Mailing Lists
21 User feedback is an integral part of the evolution of this and other
22 Bioperl modules. Send your comments and suggestions preferably to one
23 of the Bioperl mailing lists. Your participation is much appreciated.
25 bioperl-l@bioperl.org - General discussion
26 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
28 =head2 Support
30 Please direct usage questions or support issues to the mailing list:
32 L<bioperl-l@bioperl.org>
34 rather than to the module maintainer directly. Many experienced and
35 reponsive experts will be able look at the problem and quickly
36 address it. Please include a thorough description of the problem
37 with code and data examples if at all possible.
39 =head2 Reporting Bugs
41 Report bugs to the Bioperl bug tracking system to help us keep track
42 the bugs and their resolution. Bug reports can be submitted via the
43 web:
45 http://bugzilla.open-bio.org/
47 =head1 AUTHOR - Stefan Kirov
49 Email skirov@utk.edu
51 =head1 APPENDIX
53 =cut
56 # Let the code begin...
57 package Bio::Matrix::PSM::PsmHeader;
59 use Bio::Matrix::PSM::InstanceSite;
61 use strict;
62 use base qw(Bio::Root::Root Bio::Matrix::PSM::PsmHeaderI);
64 #These define what structures within the
65 @Bio::Matrix::PSM::PsmHeader::MASTHEADER=qw(html version release seq hid
66 length instances unstructured);
67 @Bio::Matrix::PSM::PsmHeader::MEMEHEADER=qw(html version release hid weight length unstructured);
68 @Bio::Matrix::PSM::PsmHeader::TRANSFACHEADER=qw(unstructured version release);
69 @Bio::Matrix::PSM::PsmHeader::PSIBLASTHEADER=qw(seq width ic);
70 @Bio::Matrix::PSM::PsmHeader::ALLHEADER=qw(header release type version html
71 release weight length id
72 seq instances unstructured);
74 =head2 new
76 Title : new
77 Usage : my $header= Bio::Matrix::PSM::PsmHeader->new(-seq=>\%seq,
78 -mid=>\%mid,
79 -width=>\%width,
80 -instances=>\%instances,
81 -header=>\@header,
82 -type=>'mast');
83 Function: Creates a new Bio::Matrix::PSM::PsmHeader object
84 Throws :
85 Example :
86 Returns : Bio::Matrix::PSM::PsmHeader object
87 Args : hash
90 =cut
92 sub new {
93 my ($class,@args)=@_;
94 my $self = $class->SUPER::new(@args);
95 return $self;
98 #parse version/release info here from the unstructured array
99 sub _initialize {
100 my $self = shift;
101 my $type=ref($self);
102 $type=~s/\w+:://g;
103 $self->{_type} = $type;
104 my $dat=join(" ",grep(/version|release/i,@{$self->{unstructured}}));
105 if ($dat && ($dat=~/version\b/i)) {
106 $self->{version}=substr($dat,$+[0]+1);
107 $self->{version}=~s/\s.+[^\d\.\:\/]//g;
108 $self->{version}=~s/^\D//;
110 if ($dat && ($dat=~/release\b/i)) {
111 my $rel=substr($dat,$+[0]+1);
112 $rel=~s/[^\d\.\:\/\-]//g;
113 $rel=~s/^\D//;
114 if ($rel=~/\d\d:\d\d:\d\d/) { #Reformat if time is available too
115 my $time=substr($rel,$-[0]+1);
116 my $dat= substr($rel,0,$-[0]);
117 $self->{release}="$dat $time";
119 else { $self->{release}=$rel; }
121 return $self;
124 =head2 seq
126 Title : seq
127 Usage : my %seq= $header->seq();
128 Function: Returns the sequence data as a hash, indexed by a sequence ID (motif id or accession number)
129 In case the input data is a motif it would return the consenus seq for each of them (mast).
130 Throws :
131 Example :
132 Returns : hash
133 Args :
136 =cut
138 sub seq {
139 my $self = shift;
140 return () unless ($self->_check('seq'));
141 return %{$self->{seq}};
144 =head2 hid
146 Title : hid
147 Usage : my @hid= $header->hid();
148 Function: Returns array with the motif ids
149 Throws :
150 Example :
151 Returns : array
152 Args :
155 =cut
157 sub hid {
158 my $self = shift;
159 return unless ($self->_check('hid'));
160 my @header=@{$self->{hid}};
161 return @header;
164 =head2 length
166 Title : length
167 Usage : my %length= $header->length();
168 Function: Returns the length of the input sequence or motifs as a hash, indexed
169 by a sequence ID (motif id or accession number)
170 Throws :
171 Example :
172 Returns : hash
173 Args :
176 =cut
178 sub length {
179 my $self = shift;
180 return unless ($self->_check('length'));
181 return $self->{length};
184 =head2 instances
186 Title : instances
187 Usage : my %instances= $header->instances();
188 Function: Returns the info about the input data, contained in the header
189 Throws :
190 Example :
191 Returns : hash
192 Args :
195 =cut
197 sub instances {
198 my $self = shift;
199 return unless ($self->_check('instances'));
200 return %{$self->{instances}};
203 =head2 weight
205 Title : weight
206 Usage : my %weights= $header->weight();
207 Function: Returns the weights of the input sequence as a hash, indexed
208 by a sequence ID
209 Throws :
210 Example :
211 Returns : hash
212 Args :
215 =cut
217 sub weight {
218 my $self = shift;
219 return () unless ($self->_check('weight'));
220 return %{$self->{weight}};
224 =head2 unstuctured
226 Title : unstuctured
227 Usage : my @unstructured= $header->unstuctured();
228 Function: Returns the unstructured data in the header as an array, one line per
229 array element, all control symbols are removed with \W
230 Throws :
231 Example :
232 Returns : array
233 Args :
236 =cut
238 sub unstructured {
239 my $self = shift;
240 return @{$self->{unstructured}};
243 =head2 version
245 Title : version
246 Usage : my $version= $header->version;
247 Function: Returns the version of the file being parsed if such exists
248 Throws :
249 Example :
250 Returns : string
251 Args :
254 =cut
256 sub version {
257 my $self = shift;
258 return $self->{version};
261 =head2 release
263 Title : release
264 Usage : my $release= $header->release;
265 Function: Returns the release of the file being parsed if such exists
266 Throws :
267 Example :
268 Returns : string
269 Args :
272 =cut
274 sub release {
275 my $self = shift;
276 return $self->{release};
279 =head2 _check
281 Title : _check
282 Usage : if ($self->_check('weights') { #do something} else {return 0;}
283 Function: Checks if the method called is aplicable to the file format
284 Throws :
285 Example :
286 Returns : boolean
287 Args : string
290 =cut
292 sub _check {
293 my ($self,$method) = @_;
294 my $type= $self->{'_type'};
295 if ($type eq 'meme') {
296 return 0 unless (grep(/$method/,
297 @Bio::Matrix::PSM::PsmHeader::MEMEHEADER));
298 } elsif ($type eq 'mast') {
299 return 0 unless (grep(/$method/,
300 @Bio::Matrix::PSM::PsmHeader::MASTHEADER));
301 } elsif ($type eq 'transfac') {
302 return 0 unless (grep(/$method/,
303 @Bio::Matrix::PSM::PsmHeader::TRANSFACHEADER));
304 } elsif ($type eq 'psiblast') {
305 return 0 unless (grep(/$method/,
306 @Bio::Matrix::PSM::PsmHeader::PSIBLASTHEADER));
308 return 1;