maint: restructure to use Dist::Zilla
[bioperl-live.git] / lib / Bio / LiveSeq / Gene.pm
blob99f20fb562a81e0d4f727bce1cf959741470aa7f
2 # bioperl module for Bio::LiveSeq::Gene
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net>
8 # Copyright Joseph Insana
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::LiveSeq::Gene - Range abstract class for LiveSeq
18 =head1 SYNOPSIS
20 # documentation needed
22 =head1 DESCRIPTION
24 This is used as storage for all object references concerning a particular gene.
26 =head1 AUTHOR - Joseph A.L. Insana
28 Email: Insana@ebi.ac.uk, jinsana@gmx.net
30 =head1 APPENDIX
32 The rest of the documentation details each of the object
33 methods. Internal methods are usually preceded with a _
35 =cut
37 # Let the code begin...
39 package Bio::LiveSeq::Gene;
40 use strict;
41 use Carp;
42 use Bio::LiveSeq::Prim_Transcript; # needed to create maxtranscript obj
44 =head2 new
46 Title : new
47 Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name",
48 -features => $hashref
49 -upbound => $min
50 -downbound => $max);
52 Function: generates a new Bio::LiveSeq::Gene
53 Returns : reference to a new object of class Gene
54 Errorcode -1
55 Args : one string and one hashreference containing all features defined
56 for the Gene and the references to the LiveSeq objects for those
57 features.
58 Two labels for defining boundaries of the gene. Usually the
59 boundaries will reflect max span of transcript, exon... features,
60 while the DNA sequence will be created with some flanking regions
61 (e.g. with the EMBL_SRS::gene2liveseq routine).
62 If these two labels are not given, they will default to the start
63 and end of the DNA object.
64 Note : the format of the hash has to be like
65 DNA => reference to LiveSeq::DNA object
66 Transcripts => reference to array of transcripts objrefs
67 Transclations => reference to array of transcripts objrefs
68 Exons => ....
69 Introns => ....
70 Prim_Transcripts => ....
71 Repeat_Units => ....
72 Repeat_Regions => ....
73 Only DNA and Transcripts are mandatory
75 =cut
77 sub new {
78 my ($thing, %args) = @_;
79 my $class = ref($thing) || $thing;
80 my ($i,$self,%gene);
82 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
84 unless (ref($inputfeatures) eq "HASH") {
85 carp "$class not initialised because features hash not given";
86 return (-1);
89 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
90 my $features=\%features; # the ones input'ed, that could get destroyed
92 my $DNA=$features->{'DNA'};
93 unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
94 carp "$class not initialised because DNA feature not found";
95 return (-1);
98 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
100 my ($start,$end);
102 my @Transcripts=@{$features->{'Transcripts'}};
104 my $strand;
105 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
106 $self->warn("$class not initialised: first Transcript not a LiveSeq object");
107 return (-1);
108 } else {
109 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
112 for $i (@Transcripts) {
113 ($start,$end)=($i->start,$i->end);
114 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
115 $self->warn("$class not initialised because of problems in Transcripts feature");
116 return (-1);
117 } else {
119 unless($minstart) { $minstart=$start; } # initialize
120 unless($maxend) { $maxend=$end; } # initialize
121 if ($i->strand != $strand) {
122 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
123 return (-1);
125 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
126 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
128 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
129 my @Prim_Transcripts; my @Exons;
130 if (defined($features->{'Translations'})) {
131 @Translations=@{$features->{'Translations'}}; }
132 if (defined($features->{'Exons'})) {
133 @Exons=@{$features->{'Exons'}}; }
134 if (defined($features->{'Introns'})) {
135 @Introns=@{$features->{'Introns'}}; }
136 if (defined($features->{'Repeat_Units'})) {
137 @Repeat_Units=@{$features->{'Repeat_Units'}}; }
138 if (defined($features->{'Repeat_Regions'})) {
139 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
140 if (defined($features->{'Prim_Transcripts'})) {
141 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
144 if (@Translations) {
145 for $i (@Translations) {
146 ($start,$end)=($i->start,$i->end);
147 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
148 $self->warn("$class not initialised because of problems in Translations feature");
149 return (-1);
153 if (@Exons) {
154 for $i (@Exons) {
155 ($start,$end)=($i->start,$i->end);
156 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
157 $self->warn("$class not initialised because of problems in Exons feature");
158 return (-1);
160 if ($i->strand != $strand) {
161 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
162 return (-1);
164 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
165 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
168 if (@Introns) {
169 for $i (@Introns) {
170 ($start,$end)=($i->start,$i->end);
171 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
172 $self->warn("$class not initialised because of problems in Introns feature");
173 return (-1);
177 if (@Repeat_Units) {
178 for $i (@Repeat_Units) {
179 ($start,$end)=($i->start,$i->end);
180 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
181 $self->warn("$class not initialised because of problems in Repeat_Units feature");
182 return (-1);
186 if (@Repeat_Regions) {
187 for $i (@Repeat_Regions) {
188 ($start,$end)=($i->start,$i->end);
189 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
190 $self->warn("$class not initialised because of problems in Repeat_Regions feature");
191 return (-1);
195 if (@Prim_Transcripts) {
196 for $i (@Prim_Transcripts) {
197 ($start,$end)=($i->start,$i->end);
198 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
199 $self->warn("$class not initialised because of problems in Prim_Transcripts feature");
200 return (-1);
202 if ($i->strand != $strand) {
203 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
204 return (-1);
206 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
207 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
211 # create an array containing all obj references for all Gene Features
212 # useful for _set_Gene_in_all
213 my @allfeatures;
214 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts);
216 # create hash holding numbers for Gene Features
217 my %multiplicity;
218 my $key; my @array;
219 foreach $key (keys(%features)) {
220 unless ($key eq "DNA") {
221 @array=@{$features{$key}};
222 $multiplicity{$key}=scalar(@array);
225 $multiplicity{DNA}=1;
227 # create maxtranscript object. It's a Prim_Transcript with start as the
228 # minimum start and end as the maximum end.
229 # usually these start and end will be the same as the gene->upbound and
230 # gene->downbound, but maybe there could be cases when this will be false
231 # (e.g. with repeat_units just before the prim_transcript or first exon,
232 # but still labelled with the same /gene qualifier)
234 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
237 # check the upbound downbound parameters
238 if (defined($upbound)) {
239 unless ($DNA->valid($upbound)) {
240 $self->warn("$class not initialised because upbound label not valid");
241 return (-1);
243 } else {
244 $upbound=$DNA->start;
246 if (defined($downbound)) {
247 unless ($DNA->valid($downbound)) {
248 $self->warn("$class not initialised because downbound label not valid");
249 return (-1);
251 } else {
252 $downbound=$DNA->end;
255 %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
256 upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
257 $self = \%gene;
258 $self = bless $self, $class;
259 _set_Gene_in_all($self,@allfeatures);
260 return $self;
263 # this sets the "gene" objref in all the objects "belonging" to the Gene,
264 # i.e. in all its Features.
265 sub _set_Gene_in_all {
266 my $Gene=shift;
267 my $self;
268 foreach $self (@_) {
269 $self->gene($Gene);
273 # you can get or set the name of the gene
274 sub name {
275 my ($self,$value) = @_;
276 if (defined $value) {
277 $self->{'name'} = $value;
279 unless (exists $self->{'name'}) {
280 return "unknown";
281 } else {
282 return $self->{'name'};
286 # gets the features hash
287 sub features {
288 my $self=shift;
289 return ($self->{'features'});
291 sub get_DNA {
292 my $self=shift;
293 return ($self->{'features'}->{'DNA'});
295 sub get_Transcripts {
296 my $self=shift;
297 return ($self->{'features'}->{'Transcripts'});
299 sub get_Translations {
300 my $self=shift;
301 return ($self->{'features'}->{'Translations'});
303 sub get_Prim_Transcripts {
304 my $self=shift;
305 return ($self->{'features'}->{'Prim_Transcripts'});
307 sub get_Repeat_Units {
308 my $self=shift;
309 return ($self->{'features'}->{'Repeat_Units'});
311 sub get_Repeat_Regions {
312 my $self=shift;
313 return ($self->{'features'}->{'Repeat_Regions'});
315 sub get_Introns {
316 my $self=shift;
317 return ($self->{'features'}->{'Introns'});
319 sub get_Exons {
320 my $self=shift;
321 return ($self->{'features'}->{'Exons'});
323 sub featuresnum {
324 my $self=shift;
325 return ($self->{'multiplicity'});
327 sub upbound {
328 my $self=shift;
329 return ($self->{'upbound'});
331 sub downbound {
332 my $self=shift;
333 return ($self->{'downbound'});
335 sub printfeaturesnum {
336 my $self=shift;
337 my ($key,$value);
338 my %hash=%{$self->featuresnum};
339 foreach $key (keys(%hash)) {
340 $value=$hash{$key};
341 print "\t$key => $value\n";
344 sub maxtranscript {
345 my $self=shift;
346 return ($self->{'maxtranscript'});
349 sub delete_Obj {
350 my $self = shift;
351 my @values= values %{$self};
352 my @keys= keys %{$self};
354 foreach my $key ( @keys ) {
355 delete $self->{$key};
357 foreach my $value ( @values ) {
358 if (index(ref($value),"LiveSeq") != -1) { # object case
359 eval {
360 # delete $self->{$value};
361 $value->delete_Obj;
363 } elsif (index(ref($value),"ARRAY") != -1) { # array case
364 my @array=@{$value};
365 my $element;
366 foreach $element (@array) {
367 eval {
368 $element->delete_Obj;
371 } elsif (index(ref($value),"HASH") != -1) { # object case
372 my %hash=%{$value};
373 my $element;
374 foreach $element (%hash) {
375 eval {
376 $element->delete_Obj;
381 return(1);
385 =head2 verbose
387 Title : verbose
388 Usage : $self->verbose(0)
389 Function: Sets verbose level for how ->warn behaves
390 -1 = silent: no warning
391 0 = reduced: minimal warnings
392 1 = default: all warnings
393 2 = extended: all warnings + stack trace dump
394 3 = paranoid: a warning becomes a throw and the program dies
396 Note: a quick way to set all LiveSeq objects at the same verbosity
397 level is to change the DNA level object, since they all look to
398 that one if their verbosity_level attribute is not set.
399 But the method offers fine tuning possibility by changing the
400 verbose level of each object in a different way.
402 So for example, after $loader= and $gene= have been retrieved
403 by a program, the command $gene->verbose(0); would
404 set the default verbosity level to 0 for all objects.
406 Returns : the current verbosity level
407 Args : -1,0,1,2 or 3
409 =cut
412 sub verbose {
413 my $self=shift;
414 my $value = shift;
415 return $self->{'features'}->{'DNA'}->verbose($value);
418 sub warn {
419 my $self=shift;
420 my $value = shift;
421 return $self->{'features'}->{'DNA'}->warn($value);