* seq_inds is not defined for Model-based HSPs
[bioperl-live.git] / Bio / Cluster / UniGene.pm
blob5143faf67da13e3408f9bf7d9c48e35cd75e15d4
1 # $Id$
3 # BioPerl module for Bio::Cluster::UniGene.pm
5 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://meg.otago.ac.nz/
11 # You may distribute this module under the same terms as perl itself
13 # _history
14 # April 17, 2002 - Initial implementation by Andrew Macgregor
15 # POD documentation - main docs before the code
17 =head1 NAME
19 Bio::Cluster::UniGene - UniGene object
21 =head1 SYNOPSIS
23 use Bio::Cluster::UniGene;
24 use Bio::ClusterIO;
26 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
27 '-format' => "unigene");
28 # note: we quote -format to keep older perl's from complaining.
30 while ( my $in = $stream->next_cluster() ) {
31 print $in->unigene_id() . "\n";
32 while ( my $sequence = $in->next_seq() ) {
33 print $sequence->accession_number() . "\n";
37 =head1 DESCRIPTION
39 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
40 for the representation if UniGene clusters in Bioperl. It is returned
41 by the L<Bio::ClusterIO> parser for unigene format and contains all
42 the data associated with one UniGene record.
44 This class implements several interfaces and hence can be used
45 wherever instances of such interfaces are expected. In particular, the
46 interfaces are L<Bio::ClusterI> as the base interface for all cluster
47 representations, and in addition L<Bio::IdentifiableI> and
48 L<Bio::DescribableI>.
50 The following lists the UniGene specific methods that are available
51 (see below for details). Be aware next_XXX iterators take a snapshot
52 of the array property when they are first called, and this snapshot is
53 not reset until the iterator is exhausted. Hence, once called you need
54 to exhaust the iterator to see any changes that have been made to the
55 property in the meantime. You will usually want to use the
56 non-iterator equivalents and loop over the elements yourself.
58 new() - standard new call
60 unigene_id() - set/get unigene_id
62 title() - set/get title (description)
64 gene() - set/get gene
66 cytoband() - set/get cytoband
68 mgi() - set/get mgi
70 locuslink() - set/get locuslink
72 homol() - set/get homologene
74 gnm_terminus() - set/get gnm_terminus
76 scount() - set/get scount
78 express() - set/get express, currently takes/returns a reference to an
79 array of expressed tissues
81 next_express() - returns the next tissue expression from the expressed
82 tissue array
84 chromosome() - set/get chromosome, currently takes/returns a reference
85 to an array of chromosome lines
87 next_chromosome() - returns the next chromosome line from the array of
88 chromosome lines
90 sts() - set/get sts, currently takes/returns a reference to an array
91 of sts lines
93 next_sts() - returns the next sts line from the array of sts lines
95 txmap() - set/get txmap, currently takes/returns a reference to an
96 array of txmap lines
98 next_txmap() - returns the next txmap line from the array of txmap
99 lines
101 protsim() - set/get protsim, currently takes/returns a reference to an
102 array of protsim lines
104 next_protsim() - returns the next protsim line from the array of
105 protsim lines
107 sequences() - set/get sequence, currently takes/returns a reference to
108 an array of references to seq info
110 next_seq() - returns a Seq object that currently only contains an
111 accession number
114 =head1 Implemented Interfaces
116 This class implementes the following interfaces.
118 =over 4
120 =item Bio::Cluster::UniGeneI
122 This includes implementing Bio::ClusterI.
124 =item Bio::IdentifiableI
126 =item Bio::DescribableI
128 =item Bio::AnnotatableI
130 =item Bio::Factory::SequenceStreamI
132 =back
134 =head1 FEEDBACK
137 =head2 Mailing Lists
139 User feedback is an integral part of the evolution of this and other
140 Bioperl modules. Send your comments and suggestions preferably to one
141 of the Bioperl mailing lists. Your participation is much appreciated.
143 bioperl-l@bioperl.org - General discussion
144 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
146 =head2 Reporting Bugs
148 Report bugs to the Bioperl bug tracking system to help us keep track
149 the bugs and their resolution. Bug reports can be submitted via the
150 web:
152 http://bugzilla.open-bio.org/
154 =head1 AUTHOR - Andrew Macgregor
156 Email andrew at cbbc.murdoch.edu.au
158 =head1 CONTRIBUTORS
160 Hilmar Lapp, hlapp at gmx.net
162 =head1 APPENDIX
165 The rest of the documentation details each of the object
166 methods. Internal methods are usually preceded with a "_".
168 =cut
170 # Let the code begin...
173 package Bio::Cluster::UniGene;
174 use strict;
176 use Bio::Annotation::Collection;
177 use Bio::Annotation::DBLink;
178 use Bio::Annotation::SimpleValue;
179 use Bio::Species;
180 use Bio::Seq::SeqFactory;
182 use base qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
184 my %species_map = (
185 'Aga' => "Anopheles gambiae",
186 'Ame' => "Apis mellifera",
187 'At' => "Arabidopsis thaliana",
188 'Bmo' => "Bombyx mori",
189 'Bt' => "Bos taurus",
190 'Cel' => "Caenorhabditis elegans",
191 'Cfa' => "Canine familiaris",
192 'Cin' => "Ciona intestinalis",
193 'Cre' => "Chlamydomonas reinhardtii",
194 'Csa' => "Ciona savignyi",
195 'Csi' => "Citrus sinensis",
196 'Ddi' => "Dictyostelium discoideum",
197 'Dr' => "Danio rerio",
198 'Dm' => "Drosophila melanogaster",
199 'Gga' => "Gallus gallus",
200 'Gma' => "Glycine max",
201 'Han' => "Helianthus annus",
202 'Hs' => "Homo sapiens",
203 'Hma' => "Hydra magnipapillata",
204 'Hv' => "Hordeum vulgare",
205 'Lco' => "Lotus corniculatus",
206 'Les' => "Lycopersicon esculentum",
207 'Lsa' => "Lactuca sativa",
208 'Mdo' => "Malus x domestica",
209 'Mgr' => "Magnaporthe grisea",
210 'Mm' => "Mus musculus",
211 'Mtr' => "Medicago truncatula",
212 'Ncr' => "Neurospora crassa",
213 'Oar' => "Ovis aries",
214 'Omy' => "Oncorhynchus mykiss",
215 'Os' => "Oryza sativa",
216 'Ola' => "Oryzias latipes",
217 'Ppa' => "Physcomitrella patens",
218 'Pta' => "Pinus taeda",
219 'Ptp' => "Populus tremula x Populus tremuloides",
220 'Rn' => "Rattus norvegicus",
221 'Sbi' => "Sorghum bicolor",
222 'Sma' => "Schistosoma mansoni",
223 'Sof' => "Saccharum officinarum",
224 'Spu' => "Strongylocentrotus purpuratus",
225 'Ssa' => "Salmo salar",
226 'Ssc' => "Sus scrofa",
227 'Str' => "Xenopus tropicalis",
228 'Stu' => "Solanum tuberosum",
229 'Ta' => "Triticum aestivum",
230 'Tgo' => "Toxoplasma gondii",
231 'Tru' => "Takifugu rubripes",
232 'Vvi' => "Vitis vinifera",
233 'Xl' => "Xenopus laevis",
234 'Zm' => "Zea mays",
238 =head2 new
240 Title : new
241 Usage : used by ClusterIO
242 Returns : a new Bio::Cluster::Unigene object
244 =cut
246 sub new {
247 # standard new call..
248 my($caller,@args) = @_;
249 my $self = $caller->SUPER::new(@args);
251 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
252 $self->_rearrange([qw(UNIGENE_ID
253 DESCRIPTION
254 MEMBERS
255 SIZE
256 SPECIES
257 DISPLAY_ID
258 OBJECT_ID
259 NAMESPACE
260 AUTHORITY
261 VERSION
262 SEQFACTORY
263 )], @args);
265 $self->{'_alphabet'} = 'dna';
267 $self->unigene_id($ugid) if $ugid;
268 $self->description($desc) if $desc;
269 $self->sequences($mems) if $mems;
270 $self->size($size) if defined($size);
271 $self->display_id($dispid) if $dispid; # overwrites ugid
272 $self->object_id($id) if $id; # overwrites dispid
273 $self->namespace($ns || 'UniGene');
274 $self->authority($auth || 'NCBI');
275 $self->version($v) if defined($v);
276 if( ! defined $seqfact ) {
277 $seqfact = Bio::Seq::SeqFactory->new
278 (-verbose => $self->verbose(),
279 -type => 'Bio::Seq::RichSeq');
281 $self->sequence_factory($seqfact);
282 if( (! $species) && (defined $self->unigene_id() &&
283 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
284 # try set a default one depending on the ID
285 $species = $species_map{$1};
287 $self->species($species);
288 return $self;
292 =head1 L<Bio::Cluster::UniGeneI> methods
294 =cut
296 =head2 unigene_id
298 Title : unigene_id
299 Usage : unigene_id();
300 Function: Returns the unigene_id associated with the object.
301 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
302 Returns : A string
303 Args : None or an id
306 =cut
308 sub unigene_id {
309 my ($obj,$value) = @_;
310 if( defined $value) {
311 $obj->{'unigene_id'} = $value;
313 return $obj->{'unigene_id'};
318 =head2 title
320 Title : title
321 Usage : title();
322 Function: Returns the title associated with the object.
323 Example : $title = $unigene->title or $unigene->title($title)
324 Returns : A string
325 Args : None or a title
328 =cut
330 sub title {
331 my ($obj,$value) = @_;
332 if( defined $value) {
333 $obj->{'title'} = $value;
335 return $obj->{'title'};
339 =head2 gene
341 Title : gene
342 Usage : gene();
343 Function: Returns the gene associated with the object.
344 Example : $gene = $unigene->gene or $unigene->gene($gene)
345 Returns : A string
346 Args : None or a gene
349 =cut
351 sub gene {
352 my $self = shift;
353 return $self->_annotation_value('gene_name', @_);
357 =head2 cytoband
359 Title : cytoband
360 Usage : cytoband();
361 Function: Returns the cytoband associated with the object.
362 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
363 Returns : A string
364 Args : None or a cytoband
367 =cut
369 sub cytoband {
370 my $self = shift;
371 return $self->_annotation_value('cyto_band', @_);
374 =head2 mgi
376 Title : mgi
377 Usage : mgi();
378 Function: Returns the mgi associated with the object.
379 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
380 Returns : A string
381 Args : None or a mgi
384 =cut
386 sub mgi {
387 my $self = shift;
388 my $acc;
390 if(@_) {
391 # purge first
392 $self->_remove_dblink('dblink','MGI');
393 # then add if a valid value is present
394 if($acc = shift) {
395 $self->_annotation_dblink('dblink','MGI',$acc);
397 } else {
398 ($acc) = $self->_annotation_dblink('dblink','MGI');
400 return $acc;
404 =head2 locuslink
406 Title : locuslink
407 Usage : locuslink();
408 Function: Returns or stores a reference to an array containing locuslink data.
409 Returns : An array reference
410 Args : None or an array reference
412 =cut
414 sub locuslink {
415 my ($self,$ll) = @_;
417 if($ll) {
418 # purge first
419 $self->_remove_dblink('dblink','LocusLink');
420 # then add as many accessions as are present
421 foreach my $acc (@$ll) {
422 $self->_annotation_dblink('dblink','LocusLink',$acc);
424 } else {
425 my @accs = $self->_annotation_dblink('dblink','LocusLink');
426 $ll = [@accs];
428 return $ll;
432 =head2 homol
434 Title : homol
435 Usage : homol();
436 Function: Returns the homol entry associated with the object.
437 Example : $homol = $unigene->homol or $unigene->homol($homol)
438 Returns : A string
439 Args : None or a homol entry
441 =cut
443 sub homol {
444 my $self = shift;
445 return $self->_annotation_value('homol', @_);
449 =head2 restr_expr
451 Title : restr_expr
452 Usage : restr_expr();
453 Function: Returns the restr_expr entry associated with the object.
454 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
455 Returns : A string
456 Args : None or a restr_expr entry
458 =cut
460 sub restr_expr {
461 my $self = shift;
462 return $self->_annotation_value('restr_expr', @_);
466 =head2 gnm_terminus
468 Title : gnm_terminus
469 Usage : gnm_terminus();
470 Function: Returns the gnm_terminus associated with the object.
471 Example : $gnm_terminus = $unigene->gnm_terminus or
472 $unigene->gnm_terminus($gnm_terminus)
473 Returns : A string
474 Args : None or a gnm_terminus
476 =cut
478 sub gnm_terminus {
479 my $self = shift;
480 return $self->_annotation_value('gnm_terminus', @_);
483 =head2 scount
485 Title : scount
486 Usage : scount();
487 Function: Returns the scount associated with the object.
488 Example : $scount = $unigene->scount or $unigene->scount($scount)
489 Returns : A string
490 Args : None or a scount
492 =cut
494 sub scount {
495 my ($obj,$value) = @_;
496 if( defined $value) {
497 $obj->{'scount'} = $value;
498 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
499 $obj->{'scount'} = $obj->size();
501 return $obj->{'scount'};
505 =head2 express
507 Title : express
508 Usage : express();
509 Function: Returns or stores a reference to an array containing
510 tissue expression data
511 Returns : An array reference
512 Args : None or an array reference
514 =cut
516 sub express {
517 my $self = shift;
519 return $self->_annotation_value_ary('expressed',@_);
523 =head2 chromosome
525 Title : chromosome
526 Usage : chromosome();
527 Function: Returns or stores a reference to an array containing
528 chromosome lines
529 Returns : An array reference
530 Args : None or an array reference
532 =cut
534 sub chromosome {
535 my $self = shift;
537 return $self->_annotation_value_ary('chromosome',@_);
541 =head2 sts
543 Title : sts
544 Usage : sts();
545 Function: Returns or stores a reference to an array containing sts lines
547 Returns : An array reference
548 Args : None or an array reference
550 =cut
552 sub sts {
553 my $self = shift;
555 return $self->_annotation_value_ary('sts',@_);
559 =head2 txmap
561 Title : txmap
562 Usage : txmap();
563 Function: Returns or stores a reference to an array containing txmap lines
565 Returns : An array reference
566 Args : None or an array reference
568 =cut
570 sub txmap {
571 my $self = shift;
573 return $self->_annotation_value_ary('txmap',@_);
577 =head2 protsim
579 Title : protsim
580 Usage : protsim();
581 Function: Returns or stores a reference to an array containing protsim lines
582 This should really only be used by ClusterIO, not directly
583 Returns : An array reference
584 Args : None or an array reference
586 =cut
588 sub protsim {
589 my $self = shift;
591 return $self->_annotation_value_ary('protsim',@_);
595 =head2 sequences
597 Title : sequences
598 Usage : sequences();
599 Function: Returns or stores a reference to an array containing
600 sequence data.
602 This is mostly reserved for ClusterIO parsers. You should
603 use get_members() for get and add_member()/remove_members()
604 for set.
606 Returns : An array reference, or undef
607 Args : None or an array reference or undef
609 =cut
611 sub sequences {
612 my $self = shift;
614 return $self->{'members'} = shift if @_;
615 return $self->{'members'};
618 =head2 species
620 Title : species
621 Usage : $obj->species($newval)
622 Function: Get/set the species object for this Unigene cluster.
623 Example :
624 Returns : value of species (a L<Bio::Species> object)
625 Args : on set, new value (a L<Bio::Species> object or
626 the binomial name, or undef, optional)
629 =cut
631 sub species{
632 my $self = shift;
634 if(@_) {
635 my $species = shift;
636 if($species && (! ref($species))) {
637 my @class = reverse(split(' ',$species));
638 $species = Bio::Species->new(-classification => \@class);
640 return $self->{'species'} = $species;
642 return $self->{'species'};
646 =head1 L<Bio::ClusterI> methods
648 =cut
650 =head2 display_id
652 Title : display_id
653 Usage :
654 Function: Get/set the display name or identifier for the cluster
656 This is aliased to unigene_id().
658 Returns : a string
659 Args : optional, on set the display ID ( a string)
661 =cut
663 sub display_id{
664 return shift->unigene_id(@_);
667 =head2 description
669 Title : description
670 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
671 Function: get/set for the consensus description of the cluster
673 This is aliased to title().
675 Returns : the description string
676 Args : Optional the description string
678 =cut
680 sub description{
681 return shift->title(@_);
684 =head2 size
686 Title : size
687 Usage : Bio::ClusterI->size();
688 Function: get for the size of the family,
689 calculated from the number of members
691 This is aliased to scount().
693 Returns : the size of the cluster
694 Args :
696 =cut
698 sub size {
699 my $self = shift;
701 # hard-wiring the size is allowed if there are no sequences
702 return $self->scount(@_) unless defined($self->sequences());
703 # but we can't change the number of members through this method
704 my $n = scalar(@{$self->sequences()});
705 if(@_ && ($n != $_[0])) {
706 $self->throw("Cannot change cluster size using size() from $n to ".
707 $_[0]);
709 return $n;
712 =head2 cluster_score
714 Title : cluster_score
715 Usage : $cluster ->cluster_score(100);
716 Function: get/set for cluster_score which
717 represent the score in which the clustering
718 algorithm assigns to this cluster.
720 For UniGene clusters, there really is no cluster score that
721 would come with the data. However, we provide an
722 implementation here so that you can score UniGene clusters
723 if you want to.
725 Returns : a number
726 Args : optionally, on set a number
728 =cut
730 sub cluster_score{
731 my $self = shift;
733 return $self->{'cluster_score'} = shift if @_;
734 return $self->{'cluster_score'};
737 =head2 get_members
739 Title : get_members
740 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
741 Function: retrieve the members of the family by some criteria
743 Will return all members if no criteria are provided.
745 At this time this implementation does not support
746 specifying criteria and will always return all members.
748 Returns : the array of members
749 Args :
751 =cut
753 sub get_members {
754 my $self = shift;
756 my $mems = $self->sequences() || [];
757 # already objects?
758 if(@$mems && (ref($mems->[0]) eq "HASH")) {
759 # nope, we need to build the object list from scratch
760 my @memlist = ();
761 while(my $seq = $self->next_seq()) {
762 push(@memlist, $seq);
764 # we cache this array of objects as the new member list
765 $mems = \@memlist;
766 $self->sequences($mems);
768 # done
769 return @$mems;
773 =head1 Annotatable view at the object properties
775 =cut
777 =head2 annotation
779 Title : annotation
780 Usage : $obj->annotation($newval)
781 Function: Get/set the L<Bio::AnnotationCollectionI> object for
782 this UniGene cluster.
784 Many attributes of this class are actually stored within
785 the annotation collection object as L<Bio::AnnotationI>
786 compliant objects, so you can conveniently access them
787 through the same interface as you would e.g. access
788 L<Bio::SeqI> annotation properties.
790 If you call this method in set mode and replace the
791 annotation collection with another one you should know
792 exactly what you are doing.
794 Example :
795 Returns : a L<Bio::AnnotationCollectionI> compliant object
796 Args : on set, new value (a L<Bio::AnnotationCollectionI>
797 compliant object or undef, optional)
800 =cut
802 sub annotation{
803 my $self = shift;
805 if(@_) {
806 return $self->{'annotation'} = shift;
807 } elsif(! exists($self->{'annotation'})) {
808 $self->{'annotation'} = Bio::Annotation::Collection->new();
810 return $self->{'annotation'};
814 =head1 Implementation specific methods
816 These are mostly for adding/removing to array properties, and for
817 methods with special functionality.
819 =cut
821 =head2 add_member
823 Title : add_member
824 Usage :
825 Function: Adds a member object to the list of members.
826 Example :
827 Returns : TRUE if the new member was successfuly added, and FALSE
828 otherwise.
829 Args : The member to add.
832 =cut
834 sub add_member{
835 my ($self,@mems) = @_;
837 my $memlist = $self->{'members'} || [];
838 # this is an object interface; is the member list already objects?
839 if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
840 # nope, convert to objects
841 $memlist = [$self->get_members()];
843 # add new member(s)
844 push(@$memlist, @mems);
845 # store if we created this array ref ourselves
846 $self->sequences($memlist);
847 # done
848 return 1;
851 =head2 remove_members
853 Title : remove_members
854 Usage :
855 Function: Remove the list of members for this cluster such that the
856 member list is undefined afterwards (as opposed to zero members).
857 Example :
858 Returns : the previous list of members
859 Args : none
862 =cut
864 sub remove_members{
865 my $self = shift;
867 my @mems = $self->get_members();
868 $self->sequences(undef);
869 return @mems;
873 =head2 next_locuslink
875 Title : next_locuslink
876 Usage : next_locuslink();
877 Function: Returns the next locuslink from an array referred
878 to using $obj->{'locuslink'}
880 If you call this iterator again after it returned undef, it
881 will re-cycle through the list of elements. Changes in the
882 underlying array property while you loop over this iterator
883 will not be reflected until you exhaust the iterator.
885 Example : while ( my $locuslink = $in->next_locuslink() ) {
886 print "$locuslink\n";
888 Returns : String
889 Args : None
891 =cut
893 sub next_locuslink {
894 my ($obj) = @_;
896 return $obj->_next_element("ll","locuslink");
899 =head2 next_express
901 Title : next_express
902 Usage : next_express();
903 Function: Returns the next tissue from an array referred
904 to using $obj->{'express'}
906 If you call this iterator again after it returned undef, it
907 will re-cycle through the list of elements. Changes in the
908 underlying array property while you loop over this iterator
909 will not be reflected until you exhaust the iterator.
911 Example : while ( my $express = $in->next_express() ) {
912 print "$express\n";
914 Returns : String
915 Args : None
917 =cut
919 sub next_express {
920 my ($obj) = @_;
922 return $obj->_next_element("express","express");
926 =head2 next_chromosome
928 Title : next_chromosome
929 Usage : next_chromosome();
930 Function: Returns the next chromosome line from an array referred
931 to using $obj->{'chromosome'}
933 If you call this iterator again after it returned undef, it
934 will re-cycle through the list of elements. Changes in the
935 underlying array property while you loop over this iterator
936 will not be reflected until you exhaust the iterator.
938 Example : while ( my $chromosome = $in->next_chromosome() ) {
939 print "$chromosome\n";
941 Returns : String
942 Args : None
944 =cut
946 sub next_chromosome {
947 my ($obj) = @_;
949 return $obj->_next_element("chr","chromosome");
953 =head2 next_protsim
955 Title : next_protsim
956 Usage : next_protsim();
957 Function: Returns the next protsim line from an array referred
958 to using $obj->{'protsim'}
960 If you call this iterator again after it returned undef, it
961 will re-cycle through the list of elements. Changes in the
962 underlying array property while you loop over this iterator
963 will not be reflected until you exhaust the iterator.
965 Example : while ( my $protsim = $in->next_protsim() ) {
966 print "$protsim\n";
968 Returns : String
969 Args : None
971 =cut
973 sub next_protsim {
974 my ($obj) = @_;
976 return $obj->_next_element("protsim","protsim");
980 =head2 next_sts
982 Title : next_sts
983 Usage : next_sts();
984 Function: Returns the next sts line from an array referred
985 to using $obj->{'sts'}
987 If you call this iterator again after it returned undef, it
988 will re-cycle through the list of elements. Changes in the
989 underlying array property while you loop over this iterator
990 will not be reflected until you exhaust the iterator.
992 Example : while ( my $sts = $in->next_sts() ) {
993 print "$sts\n";
995 Returns : String
996 Args : None
998 =cut
1000 sub next_sts {
1001 my ($obj) = @_;
1003 return $obj->_next_element("sts","sts");
1007 =head2 next_txmap
1009 Title : next_txmap
1010 Usage : next_txmap();
1011 Function: Returns the next txmap line from an array
1012 referred to using $obj->{'txmap'}
1014 If you call this iterator again after it returned undef, it
1015 will re-cycle through the list of elements. Changes in the
1016 underlying array property while you loop over this iterator
1017 will not be reflected until you exhaust the iterator.
1019 Example : while ( my $tsmap = $in->next_txmap() ) {
1020 print "$txmap\n";
1022 Returns : String
1023 Args : None
1025 =cut
1027 sub next_txmap {
1028 my ($obj) = @_;
1030 return $obj->_next_element("txmap","txmap");
1033 ###############################
1034 # private method
1036 # args: prefix name for the queue
1037 # name of the method from which to re-fill
1038 # returns: the next element from that queue, or undef if the queue is empty
1039 ###############################
1040 sub _next_element{
1041 my ($self,$queuename,$meth) = @_;
1043 $queuename = "_".$queuename."_queue";
1044 if(! exists($self->{$queuename})) {
1045 # re-initialize from array of sequence data
1046 $self->{$queuename} = [@{$self->$meth() }];
1048 my $queue = $self->{$queuename};
1049 # is queue exhausted (equivalent to end of stream)?
1050 if(! @$queue) {
1051 # yes, remove queue and signal to the caller
1052 delete $self->{$queuename};
1053 return;
1055 return shift(@$queue);
1058 =head1 L<Bio::IdentifiableI> methods
1060 =cut
1062 =head2 object_id
1064 Title : object_id
1065 Usage : $string = $obj->object_id()
1066 Function: a string which represents the stable primary identifier
1067 in this namespace of this object. For DNA sequences this
1068 is its accession_number, similarly for protein sequences
1070 This is aliased to unigene_id().
1072 Returns : A scalar
1075 =cut
1077 sub object_id {
1078 return shift->unigene_id(@_);
1081 =head2 version
1083 Title : version
1084 Usage : $version = $obj->version()
1085 Function: a number which differentiates between versions of
1086 the same object. Higher numbers are considered to be
1087 later and more relevant, but a single object described
1088 the same identifier should represent the same concept
1090 Unigene clusters usually won't have a version, so this
1091 will be mostly undefined.
1093 Returns : A number
1094 Args : on set, new value (a scalar or undef, optional)
1097 =cut
1099 sub version {
1100 my $self = shift;
1102 return $self->{'version'} = shift if @_;
1103 return $self->{'version'};
1107 =head2 authority
1109 Title : authority
1110 Usage : $authority = $obj->authority()
1111 Function: a string which represents the organisation which
1112 granted the namespace, written as the DNS name for
1113 organisation (eg, wormbase.org)
1115 Returns : A scalar
1116 Args : on set, new value (a scalar or undef, optional)
1119 =cut
1121 sub authority {
1122 my $self = shift;
1124 return $self->{'authority'} = shift if @_;
1125 return $self->{'authority'};
1129 =head2 namespace
1131 Title : namespace
1132 Usage : $string = $obj->namespace()
1133 Function: A string representing the name space this identifier
1134 is valid in, often the database name or the name
1135 describing the collection
1137 Returns : A scalar
1138 Args : on set, new value (a scalar or undef, optional)
1141 =cut
1143 sub namespace {
1144 my $self = shift;
1146 return $self->{'namespace'} = shift if @_;
1147 return $self->{'namespace'};
1150 =head1 L<Bio::DescribableI> methods
1152 =cut
1154 =head2 display_name
1156 Title : display_name
1157 Usage : $string = $obj->display_name()
1158 Function: A string which is what should be displayed to the user
1159 the string should have no spaces (ideally, though a cautious
1160 user of this interface would not assumme this) and should be
1161 less than thirty characters (though again, double checking
1162 this is a good idea)
1164 This is aliased to unigene_id().
1166 Returns : A scalar
1167 Status : Virtual
1169 =cut
1171 sub display_name {
1172 return shift->unigene_id(@_);
1176 =head2 description()
1178 Title : description
1179 Usage : $string = $obj->description()
1180 Function: A text string suitable for displaying to the user a
1181 description. This string is likely to have spaces, but
1182 should not have any newlines or formatting - just plain
1183 text. The string should not be greater than 255 characters
1184 and clients can feel justified at truncating strings at 255
1185 characters for the purposes of display
1187 This is already demanded by Bio::ClusterI and hence is
1188 present anyway.
1190 Returns : A scalar
1193 =cut
1196 =head1 L<Bio::Factory::SequenceStreamI> methods
1198 =cut
1200 =head2 next_seq
1202 Title : next_seq
1203 Usage : next_seq();
1204 Function: Returns the next seq as a Seq object as defined by
1205 $seq->sequence_factory(),
1206 at present an empty Bio::Seq::RichSeq object with
1207 just the accession_number() and pid() set
1209 This iterator will not exhaust the array of member
1210 sequences. If you call next_seq() again after it returned
1211 undef, it will re-cycle through the list of member
1212 sequences.
1214 Example : while ( my $sequence = $in->next_seq() ) {
1215 print $sequence->accession_number() . "\n";
1217 Returns : Bio::PrimarySeqI object
1218 Args : None
1220 =cut
1222 sub next_seq {
1223 my ($obj) = @_;
1225 if(! exists($obj->{'_seq_queue'})) {
1226 # re-initialize from array of sequence data
1227 $obj->{'_seq_queue'} = [@{$obj->sequences()}];
1229 my $queue = $obj->{'_seq_queue'};
1230 # is queue exhausted (equivalent to end of stream)?
1231 if(! @$queue) {
1232 # yes, remove queue and signal to the caller
1233 delete $obj->{'_seq_queue'};
1234 return;
1236 # no, still data in the queue: get the next one from the queue
1237 my $seq_h = shift(@$queue);
1238 # if this is not a simple hash ref, it's an object already, and we'll
1239 # return just that
1240 return $seq_h if(ref($seq_h) ne 'HASH');
1241 # nope, we need to assemble this object from scratch
1243 # assemble the annotation collection
1244 my $ac = Bio::Annotation::Collection->new();
1245 foreach my $k (keys %$seq_h) {
1246 next if $k =~ /acc|pid|nid|version/;
1247 my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
1248 -value => $seq_h->{$k});
1249 $ac->add_Annotation($ann);
1251 # assemble the initialization parameters and create object
1252 my $seqobj = $obj->sequence_factory->create(
1253 -accession_number => $seq_h->{acc},
1254 -pid => $seq_h->{pid},
1255 # why does NCBI prepend a 'g' to its own identifiers??
1256 -primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
1257 substr($seq_h->{nid},1) : $seq_h->{nid},
1258 -display_id => $seq_h->{acc},
1259 -seq_version => $seq_h->{version},
1260 -alphabet => $obj->{'_alphabet'},
1261 -namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
1262 -authority => $obj->authority(), # default is NCBI
1263 -species => $obj->species(),
1264 -annotation => $ac
1266 return $seqobj;
1269 =head2 sequence_factory
1271 Title : sequence_factory
1272 Usage : $seqio->sequence_factory($seqfactory)
1273 Function: Get/Set the Bio::Factory::SequenceFactoryI
1274 Returns : Bio::Factory::SequenceFactoryI
1275 Args : [optional] Bio::Factory::SequenceFactoryI
1278 =cut
1280 sub sequence_factory {
1281 my ($self,$obj) = @_;
1282 if( defined $obj ) {
1283 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1284 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1286 $self->{'_seqfactory'} = $obj;
1288 $self->{'_seqfactory'};
1291 =head1 Private methods
1293 =cut
1295 =head2 _annotation_value
1297 Title : _annotation_value
1298 Usage :
1299 Function: Private method.
1300 Example :
1301 Returns : the value (a string)
1302 Args : annotation key (a string)
1303 on set, annotation value (a string)
1306 =cut
1308 sub _annotation_value{
1309 my $self = shift;
1310 my $key = shift;
1312 my ($ann, $val);
1313 if(@_) {
1314 $val = shift;
1315 if(! defined($val)) {
1316 ($ann) = $self->annotation->remove_Annotations($key);
1317 return $ann ? $ann->value() : undef;
1320 ($ann) = $self->annotation->get_Annotations($key);
1321 if(defined $ann && (! $val)) {
1322 # get mode and exists
1323 $val = $ann->value();
1324 } elsif($val) {
1325 # set mode
1326 if(!defined $ann) {
1327 $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
1328 $self->annotation->add_Annotation($ann);
1330 $ann->value($val);
1332 return $val;
1336 =head2 _annotation_value_ary
1338 Title : _annotation_value_ary
1339 Usage :
1340 Function: Private method.
1341 Example :
1342 Returns : reference to the array of values
1343 Args : annotation key (a string)
1344 on set, reference to an array holding the values
1347 =cut
1349 sub _annotation_value_ary{
1350 my ($self,$key,$arr) = @_;
1352 my $ac = $self->annotation;
1353 if($arr) {
1354 # purge first
1355 $ac->remove_Annotations($key);
1356 # then add as many values as are present
1357 foreach my $val (@$arr) {
1358 my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
1359 -tagname => $key
1361 $ac->add_Annotation($ann);
1363 } else {
1364 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1365 $arr = [@vals];
1367 return $arr;
1371 =head2 _annotation_dblink
1373 Title : _annotation_dblink
1374 Usage :
1375 Function: Private method.
1376 Example :
1377 Returns : array of accessions for the given database (namespace)
1378 Args : annotation key (a string)
1379 dbname (a string) (optional on get, mandatory on set)
1380 on set, accession or ID (a string), and version
1383 =cut
1385 sub _annotation_dblink{
1386 my ($self,$key,$dbname,$acc,$version) = @_;
1388 if($acc) {
1389 # set mode -- this is adding here
1390 my $ann = Bio::Annotation::DBLink->new(-tagname => $key,
1391 -primary_id => $acc,
1392 -database => $dbname,
1393 -version => $version);
1394 $self->annotation->add_Annotation($ann);
1395 return 1;
1396 } else {
1397 # get mode
1398 my @anns = $self->annotation->get_Annotations($key);
1399 # filter out those that don't match the requested database
1400 if($dbname) {
1401 @anns = grep { $_->database() eq $dbname; } @anns;
1403 return map { $_->primary_id(); } @anns;
1407 =head2 _remove_dblink
1409 Title : _remove_dblink
1410 Usage :
1411 Function: Private method.
1412 Example :
1413 Returns : array of accessions for the given database (namespace)
1414 Args : annotation key (a string)
1415 dbname (a string) (optional)
1418 =cut
1420 sub _remove_dblink{
1421 my ($self,$key,$dbname) = @_;
1423 my $ac = $self->annotation();
1424 my @anns = ();
1425 if($dbname) {
1426 foreach my $ann ($ac->remove_Annotations($key)) {
1427 if($ann->database() eq $dbname) {
1428 push(@anns, $ann);
1429 } else {
1430 $ac->add_Annotation($ann);
1433 } else {
1434 @anns = $ac->remove_Annotations($key);
1436 return map { $_->primary_id(); } @anns;
1440 #####################################################################
1441 # aliases for naming consistency or other reasons #
1442 #####################################################################
1444 *sequence = \&sequences;