tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / SeqFeature / Tools / FeatureNamer.pm
blob8aaf790bf2e8b9660be8826c38e0cdb766750cef
1 # $Id$
3 # bioperl module for Bio::SeqFeature::Tools::FeatureNamer
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Chris Mungall <cjm@fruitfly.org>
9 # Copyright Chris Mungall
11 # You may distribute this module under the same terms as perl itself
13 # POD documentation - main docs before the code
15 =head1 NAME
17 Bio::SeqFeature::Tools::FeatureNamer - generates unique persistent names for features
19 =head1 SYNOPSIS
21 use Bio::SeqIO;
22 use Bio::SeqFeature::Tools::FeatureNamer;
24 # first fetch a genbank SeqI object
25 $seqio =
26 Bio::SeqIO->new(-file=>'AE003644.gbk',
27 -format=>'GenBank');
28 $seq = $seqio->next_seq();
30 $namer = Bio::SeqFeature::Tools::FeatureNamer->new;
31 my @features = $seq->get_SeqFeatures;
32 foreach my $feature (@features) {
33 $namer->name_feature($feature) unless $feature->display_name;
36 =head1 DESCRIPTION
38 This is a helper class for providing names for SeqFeatures
40 The L<Bio::SeqFeatureI> class provides a display_name
41 method. Typically the display_name is not set when parsing formats
42 such as genbank - instead properties such as B<label>, B<product> or
43 B<gene> are set in a somewhat inconsistent manner.
45 In addition, when generating subfeatures (for example, exons that are
46 subfeatures of a transcript feature), it is often desirable to name
47 these subfeatures before either exporting to another format or
48 reporting to the user.
50 This module is intended to help given uniform display_names to
51 features and their subfeatures.
53 =head1 TODO
55 Currently the naming policy is hardcoded. It may be desirable to allow
56 plugging in variations on naming policies; this could be done either
57 by subclassing, anonymous subroutines (closures) or
58 parameterization. Contact the author if you feel you have need for a
59 different naming policy
62 =head1 FEEDBACK
64 =head2 Mailing Lists
66 User feedback is an integral part of the evolution of this and other
67 Bioperl modules. Send your comments and suggestions preferably to the
68 Bioperl mailing lists Your participation is much appreciated.
70 bioperl-l@bioperl.org - General discussion
71 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
73 =head2 Support
75 Please direct usage questions or support issues to the mailing list:
77 I<bioperl-l@bioperl.org>
79 rather than to the module maintainer directly. Many experienced and
80 reponsive experts will be able look at the problem and quickly
81 address it. Please include a thorough description of the problem
82 with code and data examples if at all possible.
84 =head2 Reporting Bugs
86 report bugs to the Bioperl bug tracking system to help us keep track
87 the bugs and their resolution. Bug reports can be submitted via the
88 web:
90 http://bugzilla.open-bio.org/
92 =head1 AUTHOR - Chris Mungall
94 Email: cjm AT fruitfly DOT org
96 =head1 APPENDIX
98 The rest of the documentation details each of the object
99 methods. Internal methods are usually preceded with a _
101 =cut
104 # Let the code begin...
106 package Bio::SeqFeature::Tools::FeatureNamer;
107 use strict;
109 # Object preamble - inherits from Bio::Root::Root
111 use base qw(Bio::Root::Root);
113 =head2 new
115 Title : new
116 Usage : $unflattener = Bio::SeqFeature::Tools::FeatureNamer->new();
117 Function: constructor
118 Example :
119 Returns : a new Bio::SeqFeature::Tools::FeatureNamer
120 Args : see below
123 =cut
125 sub new {
126 my($class,@args) = @_;
127 my $self = $class->SUPER::new(@args);
129 # my($typemap) =
130 # $self->_rearrange([qw(TYPEMAP
131 # )],
132 # @args);#
134 # $typemap && $self->typemap($typemap);
135 return $self; # success - we hope!
138 =head2 name_feature
140 Title : name_feature
141 Usage : $namer->name_feature($sf);
142 Function: sets display_name
143 Example :
144 Returns :
145 Args : L<Bio::SeqFeatureI>
147 This method calls generate_feature_name() and uses the returned value
148 to set the display_name of the feature
150 =cut
152 sub name_feature {
153 my ($self, $sf) = @_;
154 my $name = $self->generate_feature_name($sf);
155 $sf->display_name($name);
158 =head2 name_contained_features
160 Title : name_contained_features
161 Usage : $namer->name_contained_features($sf);
162 Function: sets display_name for all features contained by sf
163 Example :
164 Returns :
165 Args : L<Bio::SeqFeatureI>
167 iterates through all subfeatures of a certain feature (using
168 get_all_SeqFeatures) and names each subfeatures, based on the
169 generated name for the holder feature
171 A subfeature is named by concatenating the generated name of the
172 container feature with the type and a number.
174 For example, if the containing feature is a gene with display name
175 B<dpp>, subfeatures will be named dpp-mRNA-1 dpp-mRNA2 dpp-exon1
176 dpp-exon2 etc
178 =cut
180 sub name_contained_features{
181 my ($self,$sf) = @_;
182 my $cname = $self->generate_feature_name($sf);
183 my @subsfs = $sf->get_all_SeqFeatures;
184 my %num_by_type = ();
185 foreach my $ssf (@subsfs) {
186 my $type = $ssf->primary_tag;
187 my $num = $num_by_type{$type} || 0;
188 $num++;
189 $num_by_type{$type} = $num;
190 $ssf->display_name("$cname-$type-$num");
192 return;
195 =head2 generate_feature_name
197 Title : generate_feature_name
198 Usage : $name = $namer->generate_feature_name($sf);
199 Function: derives a sensible human readable name for a $sf
200 Example :
201 Returns : str
202 Args : L<Bio::SeqFeatureI>
204 returns a generated name (but does not actually set display_name).
206 If display_name is already set, the method will return this
208 Otherwise, the name will depend on the property:
210 =over
212 =item label
214 =item product
216 =item gene
218 =item locus_tag
220 =back
222 (in order of priority)
224 =cut
226 sub generate_feature_name {
227 my ($self, $sf) = @_;
229 my $name = $sf->display_name;
230 if (!$name) {
231 if ($sf->has_tag("label")) {
232 ($name) = $sf->get_tag_values("label");
234 elsif ($sf->has_tag("product")) {
235 ($name) = $sf->get_tag_values("product");
237 elsif ($sf->primary_tag eq 'gene' &&
238 $sf->has_tag("gene")) {
239 ($name) = $sf->get_tag_values("gene");
241 elsif ($sf->primary_tag eq 'gene' &&
242 $sf->has_tag("locus_tag")) {
243 ($name) = $sf->get_tag_values("locus_tag");
245 else {
246 $name = $sf->display_name;
249 return $name;