* sync with trunk
[bioperl-live.git] / Bio / LiveSeq / Gene.pm
blob55d41b36f6a8a056b256bd6d37b16da91a1cfead
1 # $Id$
3 # bioperl module for Bio::LiveSeq::Gene
5 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
7 # Copyright Joseph Insana
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::LiveSeq::Gene - Range abstract class for LiveSeq
17 =head1 SYNOPSIS
19 # documentation needed
21 =head1 DESCRIPTION
23 This is used as storage for all object references concerning a particular gene.
25 =head1 AUTHOR - Joseph A.L. Insana
27 Email: Insana@ebi.ac.uk, jinsana@gmx.net
29 =head1 APPENDIX
31 The rest of the documentation details each of the object
32 methods. Internal methods are usually preceded with a _
34 =cut
36 # Let the code begin...
38 package Bio::LiveSeq::Gene;
39 use strict;
40 use Carp;
41 use Bio::LiveSeq::Prim_Transcript; # needed to create maxtranscript obj
43 =head2 new
45 Title : new
46 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name",
47 -features => $hashref
48 -upbound => $min
49 -downbound => $max);
51 Function: generates a new Bio::LiveSeq::Gene
52 Returns : reference to a new object of class Gene
53 Errorcode -1
54 Args : one string and one hashreference containing all features defined
55 for the Gene and the references to the LiveSeq objects for those
56 features.
57 Two labels for defining boundaries of the gene. Usually the
58 boundaries will reflect max span of transcript, exon... features,
59 while the DNA sequence will be created with some flanking regions
60 (e.g. with the EMBL_SRS::gene2liveseq routine).
61 If these two labels are not given, they will default to the start
62 and end of the DNA object.
63 Note : the format of the hash has to be like
64 DNA => reference to LiveSeq::DNA object
65 Transcripts => reference to array of transcripts objrefs
66 Transclations => reference to array of transcripts objrefs
67 Exons => ....
68 Introns => ....
69 Prim_Transcripts => ....
70 Repeat_Units => ....
71 Repeat_Regions => ....
72 Only DNA and Transcripts are mandatory
74 =cut
76 sub new {
77 my ($thing, %args) = @_;
78 my $class = ref($thing) || $thing;
79 my ($i,$self,%gene);
81 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
83 unless (ref($inputfeatures) eq "HASH") {
84 carp "$class not initialised because features hash not given";
85 return (-1);
88 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
89 my $features=\%features; # the ones input'ed, that could get destroyed
91 my $DNA=$features->{'DNA'};
92 unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
93 carp "$class not initialised because DNA feature not found";
94 return (-1);
97 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
99 my ($start,$end);
101 my @Transcripts=@{$features->{'Transcripts'}};
103 my $strand;
104 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
105 $self->warn("$class not initialised: first Transcript not a LiveSeq object");
106 return (-1);
107 } else {
108 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
111 for $i (@Transcripts) {
112 ($start,$end)=($i->start,$i->end);
113 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
114 $self->warn("$class not initialised because of problems in Transcripts feature");
115 return (-1);
116 } else {
118 unless($minstart) { $minstart=$start; } # initialize
119 unless($maxend) { $maxend=$end; } # initialize
120 if ($i->strand != $strand) {
121 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
122 return (-1);
124 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
125 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
127 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
128 my @Prim_Transcripts; my @Exons;
129 if (defined($features->{'Translations'})) {
130 @Translations=@{$features->{'Translations'}}; }
131 if (defined($features->{'Exons'})) {
132 @Exons=@{$features->{'Exons'}}; }
133 if (defined($features->{'Introns'})) {
134 @Introns=@{$features->{'Introns'}}; }
135 if (defined($features->{'Repeat_Units'})) {
136 @Repeat_Units=@{$features->{'Repeat_Units'}}; }
137 if (defined($features->{'Repeat_Regions'})) {
138 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
139 if (defined($features->{'Prim_Transcripts'})) {
140 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
143 if (@Translations) {
144 for $i (@Translations) {
145 ($start,$end)=($i->start,$i->end);
146 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
147 $self->warn("$class not initialised because of problems in Translations feature");
148 return (-1);
152 if (@Exons) {
153 for $i (@Exons) {
154 ($start,$end)=($i->start,$i->end);
155 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
156 $self->warn("$class not initialised because of problems in Exons feature");
157 return (-1);
159 if ($i->strand != $strand) {
160 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
161 return (-1);
163 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
164 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
167 if (@Introns) {
168 for $i (@Introns) {
169 ($start,$end)=($i->start,$i->end);
170 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
171 $self->warn("$class not initialised because of problems in Introns feature");
172 return (-1);
176 if (@Repeat_Units) {
177 for $i (@Repeat_Units) {
178 ($start,$end)=($i->start,$i->end);
179 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
180 $self->warn("$class not initialised because of problems in Repeat_Units feature");
181 return (-1);
185 if (@Repeat_Regions) {
186 for $i (@Repeat_Regions) {
187 ($start,$end)=($i->start,$i->end);
188 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
189 $self->warn("$class not initialised because of problems in Repeat_Regions feature");
190 return (-1);
194 if (@Prim_Transcripts) {
195 for $i (@Prim_Transcripts) {
196 ($start,$end)=($i->start,$i->end);
197 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
198 $self->warn("$class not initialised because of problems in Prim_Transcripts feature");
199 return (-1);
201 if ($i->strand != $strand) {
202 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
203 return (-1);
205 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
206 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
210 # create an array containing all obj references for all Gene Features
211 # useful for _set_Gene_in_all
212 my @allfeatures;
213 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts);
215 # create hash holding numbers for Gene Features
216 my %multiplicity;
217 my $key; my @array;
218 foreach $key (keys(%features)) {
219 unless ($key eq "DNA") {
220 @array=@{$features{$key}};
221 $multiplicity{$key}=scalar(@array);
224 $multiplicity{DNA}=1;
226 # create maxtranscript object. It's a Prim_Transcript with start as the
227 # minimum start and end as the maximum end.
228 # usually these start and end will be the same as the gene->upbound and
229 # gene->downbound, but maybe there could be cases when this will be false
230 # (e.g. with repeat_units just before the prim_transcript or first exon,
231 # but still labelled with the same /gene qualifier)
233 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
236 # check the upbound downbound parameters
237 if (defined($upbound)) {
238 unless ($DNA->valid($upbound)) {
239 $self->warn("$class not initialised because upbound label not valid");
240 return (-1);
242 } else {
243 $upbound=$DNA->start;
245 if (defined($downbound)) {
246 unless ($DNA->valid($downbound)) {
247 $self->warn("$class not initialised because downbound label not valid");
248 return (-1);
250 } else {
251 $downbound=$DNA->end;
254 %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
255 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
256 $self = \%gene;
257 $self = bless $self, $class;
258 _set_Gene_in_all($self,@allfeatures);
259 return $self;
262 # this sets the "gene" objref in all the objects "belonging" to the Gene,
263 # i.e. in all its Features.
264 sub _set_Gene_in_all {
265 my $Gene=shift;
266 my $self;
267 foreach $self (@_) {
268 $self->gene($Gene);
272 # you can get or set the name of the gene
273 sub name {
274 my ($self,$value) = @_;
275 if (defined $value) {
276 $self->{'name'} = $value;
278 unless (exists $self->{'name'}) {
279 return "unknown";
280 } else {
281 return $self->{'name'};
285 # gets the features hash
286 sub features {
287 my $self=shift;
288 return ($self->{'features'});
290 sub get_DNA {
291 my $self=shift;
292 return ($self->{'features'}->{'DNA'});
294 sub get_Transcripts {
295 my $self=shift;
296 return ($self->{'features'}->{'Transcripts'});
298 sub get_Translations {
299 my $self=shift;
300 return ($self->{'features'}->{'Translations'});
302 sub get_Prim_Transcripts {
303 my $self=shift;
304 return ($self->{'features'}->{'Prim_Transcripts'});
306 sub get_Repeat_Units {
307 my $self=shift;
308 return ($self->{'features'}->{'Repeat_Units'});
310 sub get_Repeat_Regions {
311 my $self=shift;
312 return ($self->{'features'}->{'Repeat_Regions'});
314 sub get_Introns {
315 my $self=shift;
316 return ($self->{'features'}->{'Introns'});
318 sub get_Exons {
319 my $self=shift;
320 return ($self->{'features'}->{'Exons'});
322 sub featuresnum {
323 my $self=shift;
324 return ($self->{'multiplicity'});
326 sub upbound {
327 my $self=shift;
328 return ($self->{'upbound'});
330 sub downbound {
331 my $self=shift;
332 return ($self->{'downbound'});
334 sub printfeaturesnum {
335 my $self=shift;
336 my ($key,$value);
337 my %hash=%{$self->featuresnum};
338 foreach $key (keys(%hash)) {
339 $value=$hash{$key};
340 print "\t$key => $value\n";
343 sub maxtranscript {
344 my $self=shift;
345 return ($self->{'maxtranscript'});
348 sub delete_Obj {
349 my $self = shift;
350 my @values= values %{$self};
351 my @keys= keys %{$self};
353 foreach my $key ( @keys ) {
354 delete $self->{$key};
356 foreach my $value ( @values ) {
357 if (index(ref($value),"LiveSeq") != -1) { # object case
358 eval {
359 # delete $self->{$value};
360 $value->delete_Obj;
362 } elsif (index(ref($value),"ARRAY") != -1) { # array case
363 my @array=@{$value};
364 my $element;
365 foreach $element (@array) {
366 eval {
367 $element->delete_Obj;
370 } elsif (index(ref($value),"HASH") != -1) { # object case
371 my %hash=%{$value};
372 my $element;
373 foreach $element (%hash) {
374 eval {
375 $element->delete_Obj;
380 return(1);
384 =head2 verbose
386 Title : verbose
387 Usage : $self->verbose(0)
388 Function: Sets verbose level for how ->warn behaves
389 -1 = silent: no warning
390 0 = reduced: minimal warnings
391 1 = default: all warnings
392 2 = extended: all warnings + stack trace dump
393 3 = paranoid: a warning becomes a throw and the program dies
395 Note: a quick way to set all LiveSeq objects at the same verbosity
396 level is to change the DNA level object, since they all look to
397 that one if their verbosity_level attribute is not set.
398 But the method offers fine tuning possibility by changing the
399 verbose level of each object in a different way.
401 So for example, after $loader= and $gene= have been retrieved
402 by a program, the command $gene->verbose(0); would
403 set the default verbosity level to 0 for all objects.
405 Returns : the current verbosity level
406 Args : -1,0,1,2 or 3
408 =cut
411 sub verbose {
412 my $self=shift;
413 my $value = shift;
414 return $self->{'features'}->{'DNA'}->verbose($value);
417 sub warn {
418 my $self=shift;
419 my $value = shift;
420 return $self->{'features'}->{'DNA'}->warn($value);