tag fourth (and hopefully last) alpha
[bioperl-live.git] / branch-1-6 / Bio / Cluster / UniGene.pm
blobf9f2e3ca94311569f81a87721608bd235bd96b90
1 # $Id$
3 # BioPerl module for Bio::Cluster::UniGene.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
9 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
10 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
11 # http://meg.otago.ac.nz/
13 # You may distribute this module under the same terms as perl itself
15 # _history
16 # April 17, 2002 - Initial implementation by Andrew Macgregor
17 # POD documentation - main docs before the code
19 =head1 NAME
21 Bio::Cluster::UniGene - UniGene object
23 =head1 SYNOPSIS
25 use Bio::Cluster::UniGene;
26 use Bio::ClusterIO;
28 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
29 '-format' => "unigene");
30 # note: we quote -format to keep older perl's from complaining.
32 while ( my $in = $stream->next_cluster() ) {
33 print $in->unigene_id() . "\n";
34 while ( my $sequence = $in->next_seq() ) {
35 print $sequence->accession_number() . "\n";
39 =head1 DESCRIPTION
41 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
42 for the representation if UniGene clusters in Bioperl. It is returned
43 by the L<Bio::ClusterIO> parser for unigene format and contains all
44 the data associated with one UniGene record.
46 This class implements several interfaces and hence can be used
47 wherever instances of such interfaces are expected. In particular, the
48 interfaces are L<Bio::ClusterI> as the base interface for all cluster
49 representations, and in addition L<Bio::IdentifiableI> and
50 L<Bio::DescribableI>.
52 The following lists the UniGene specific methods that are available
53 (see below for details). Be aware next_XXX iterators take a snapshot
54 of the array property when they are first called, and this snapshot is
55 not reset until the iterator is exhausted. Hence, once called you need
56 to exhaust the iterator to see any changes that have been made to the
57 property in the meantime. You will usually want to use the
58 non-iterator equivalents and loop over the elements yourself.
60 new() - standard new call
62 unigene_id() - set/get unigene_id
64 title() - set/get title (description)
66 gene() - set/get gene
68 cytoband() - set/get cytoband
70 mgi() - set/get mgi
72 locuslink() - set/get locuslink
74 homol() - set/get homologene
76 gnm_terminus() - set/get gnm_terminus
78 scount() - set/get scount
80 express() - set/get express, currently takes/returns a reference to an
81 array of expressed tissues
83 next_express() - returns the next tissue expression from the expressed
84 tissue array
86 chromosome() - set/get chromosome, currently takes/returns a reference
87 to an array of chromosome lines
89 next_chromosome() - returns the next chromosome line from the array of
90 chromosome lines
92 sts() - set/get sts, currently takes/returns a reference to an array
93 of sts lines
95 next_sts() - returns the next sts line from the array of sts lines
97 txmap() - set/get txmap, currently takes/returns a reference to an
98 array of txmap lines
100 next_txmap() - returns the next txmap line from the array of txmap
101 lines
103 protsim() - set/get protsim, currently takes/returns a reference to an
104 array of protsim lines
106 next_protsim() - returns the next protsim line from the array of
107 protsim lines
109 sequences() - set/get sequence, currently takes/returns a reference to
110 an array of references to seq info
112 next_seq() - returns a Seq object that currently only contains an
113 accession number
116 =head1 Implemented Interfaces
118 This class implementes the following interfaces.
120 =over 4
122 =item Bio::Cluster::UniGeneI
124 This includes implementing Bio::ClusterI.
126 =item Bio::IdentifiableI
128 =item Bio::DescribableI
130 =item Bio::AnnotatableI
132 =item Bio::Factory::SequenceStreamI
134 =back
136 =head1 FEEDBACK
139 =head2 Mailing Lists
141 User feedback is an integral part of the evolution of this and other
142 Bioperl modules. Send your comments and suggestions preferably to one
143 of the Bioperl mailing lists. Your participation is much appreciated.
145 bioperl-l@bioperl.org - General discussion
146 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
148 =head2 Support
150 Please direct usage questions or support issues to the mailing list:
152 I<bioperl-l@bioperl.org>
154 rather than to the module maintainer directly. Many experienced and
155 reponsive experts will be able look at the problem and quickly
156 address it. Please include a thorough description of the problem
157 with code and data examples if at all possible.
159 =head2 Reporting Bugs
161 Report bugs to the Bioperl bug tracking system to help us keep track
162 the bugs and their resolution. Bug reports can be submitted via the
163 web:
165 http://bugzilla.open-bio.org/
167 =head1 AUTHOR - Andrew Macgregor
169 Email andrew at cbbc.murdoch.edu.au
171 =head1 CONTRIBUTORS
173 Hilmar Lapp, hlapp at gmx.net
175 =head1 APPENDIX
178 The rest of the documentation details each of the object
179 methods. Internal methods are usually preceded with a "_".
181 =cut
183 # Let the code begin...
186 package Bio::Cluster::UniGene;
187 use strict;
189 use Bio::Annotation::Collection;
190 use Bio::Annotation::DBLink;
191 use Bio::Annotation::SimpleValue;
192 use Bio::Species;
193 use Bio::Seq::SeqFactory;
195 use base qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
197 my %species_map = (
198 'Aga' => "Anopheles gambiae",
199 'Ame' => "Apis mellifera",
200 'At' => "Arabidopsis thaliana",
201 'Bmo' => "Bombyx mori",
202 'Bt' => "Bos taurus",
203 'Cel' => "Caenorhabditis elegans",
204 'Cfa' => "Canine familiaris",
205 'Cin' => "Ciona intestinalis",
206 'Cre' => "Chlamydomonas reinhardtii",
207 'Csa' => "Ciona savignyi",
208 'Csi' => "Citrus sinensis",
209 'Ddi' => "Dictyostelium discoideum",
210 'Dr' => "Danio rerio",
211 'Dm' => "Drosophila melanogaster",
212 'Gga' => "Gallus gallus",
213 'Gma' => "Glycine max",
214 'Han' => "Helianthus annus",
215 'Hs' => "Homo sapiens",
216 'Hma' => "Hydra magnipapillata",
217 'Hv' => "Hordeum vulgare",
218 'Lco' => "Lotus corniculatus",
219 'Les' => "Lycopersicon esculentum",
220 'Lsa' => "Lactuca sativa",
221 'Mdo' => "Malus x domestica",
222 'Mgr' => "Magnaporthe grisea",
223 'Mm' => "Mus musculus",
224 'Mtr' => "Medicago truncatula",
225 'Ncr' => "Neurospora crassa",
226 'Oar' => "Ovis aries",
227 'Omy' => "Oncorhynchus mykiss",
228 'Os' => "Oryza sativa",
229 'Ola' => "Oryzias latipes",
230 'Ppa' => "Physcomitrella patens",
231 'Pta' => "Pinus taeda",
232 'Ptp' => "Populus tremula x Populus tremuloides",
233 'Rn' => "Rattus norvegicus",
234 'Sbi' => "Sorghum bicolor",
235 'Sma' => "Schistosoma mansoni",
236 'Sof' => "Saccharum officinarum",
237 'Spu' => "Strongylocentrotus purpuratus",
238 'Ssa' => "Salmo salar",
239 'Ssc' => "Sus scrofa",
240 'Str' => "Xenopus tropicalis",
241 'Stu' => "Solanum tuberosum",
242 'Ta' => "Triticum aestivum",
243 'Tgo' => "Toxoplasma gondii",
244 'Tru' => "Takifugu rubripes",
245 'Vvi' => "Vitis vinifera",
246 'Xl' => "Xenopus laevis",
247 'Zm' => "Zea mays",
251 =head2 new
253 Title : new
254 Usage : used by ClusterIO
255 Returns : a new Bio::Cluster::Unigene object
257 =cut
259 sub new {
260 # standard new call..
261 my($caller,@args) = @_;
262 my $self = $caller->SUPER::new(@args);
264 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
265 $self->_rearrange([qw(UNIGENE_ID
266 DESCRIPTION
267 MEMBERS
268 SIZE
269 SPECIES
270 DISPLAY_ID
271 OBJECT_ID
272 NAMESPACE
273 AUTHORITY
274 VERSION
275 SEQFACTORY
276 )], @args);
278 $self->{'_alphabet'} = 'dna';
280 $self->unigene_id($ugid) if $ugid;
281 $self->description($desc) if $desc;
282 $self->sequences($mems) if $mems;
283 $self->size($size) if defined($size);
284 $self->display_id($dispid) if $dispid; # overwrites ugid
285 $self->object_id($id) if $id; # overwrites dispid
286 $self->namespace($ns || 'UniGene');
287 $self->authority($auth || 'NCBI');
288 $self->version($v) if defined($v);
289 if( ! defined $seqfact ) {
290 $seqfact = Bio::Seq::SeqFactory->new
291 (-verbose => $self->verbose(),
292 -type => 'Bio::Seq::RichSeq');
294 $self->sequence_factory($seqfact);
295 if( (! $species) && (defined $self->unigene_id() &&
296 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
297 # try set a default one depending on the ID
298 $species = $species_map{$1};
300 $self->species($species);
301 return $self;
305 =head1 L<Bio::Cluster::UniGeneI> methods
307 =cut
309 =head2 unigene_id
311 Title : unigene_id
312 Usage : unigene_id();
313 Function: Returns the unigene_id associated with the object.
314 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
315 Returns : A string
316 Args : None or an id
319 =cut
321 sub unigene_id {
322 my ($obj,$value) = @_;
323 if( defined $value) {
324 $obj->{'unigene_id'} = $value;
326 return $obj->{'unigene_id'};
331 =head2 title
333 Title : title
334 Usage : title();
335 Function: Returns the title associated with the object.
336 Example : $title = $unigene->title or $unigene->title($title)
337 Returns : A string
338 Args : None or a title
341 =cut
343 sub title {
344 my ($obj,$value) = @_;
345 if( defined $value) {
346 $obj->{'title'} = $value;
348 return $obj->{'title'};
352 =head2 gene
354 Title : gene
355 Usage : gene();
356 Function: Returns the gene associated with the object.
357 Example : $gene = $unigene->gene or $unigene->gene($gene)
358 Returns : A string
359 Args : None or a gene
362 =cut
364 sub gene {
365 my $self = shift;
366 return $self->_annotation_value('gene_name', @_);
370 =head2 cytoband
372 Title : cytoband
373 Usage : cytoband();
374 Function: Returns the cytoband associated with the object.
375 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
376 Returns : A string
377 Args : None or a cytoband
380 =cut
382 sub cytoband {
383 my $self = shift;
384 return $self->_annotation_value('cyto_band', @_);
387 =head2 mgi
389 Title : mgi
390 Usage : mgi();
391 Function: Returns the mgi associated with the object.
392 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
393 Returns : A string
394 Args : None or a mgi
397 =cut
399 sub mgi {
400 my $self = shift;
401 my $acc;
403 if(@_) {
404 # purge first
405 $self->_remove_dblink('dblink','MGI');
406 # then add if a valid value is present
407 if($acc = shift) {
408 $self->_annotation_dblink('dblink','MGI',$acc);
410 } else {
411 ($acc) = $self->_annotation_dblink('dblink','MGI');
413 return $acc;
417 =head2 locuslink
419 Title : locuslink
420 Usage : locuslink();
421 Function: Returns or stores a reference to an array containing locuslink data.
422 Returns : An array reference
423 Args : None or an array reference
425 =cut
427 sub locuslink {
428 my ($self,$ll) = @_;
430 if($ll) {
431 # purge first
432 $self->_remove_dblink('dblink','LocusLink');
433 # then add as many accessions as are present
434 foreach my $acc (@$ll) {
435 $self->_annotation_dblink('dblink','LocusLink',$acc);
437 } else {
438 my @accs = $self->_annotation_dblink('dblink','LocusLink');
439 $ll = [@accs];
441 return $ll;
445 =head2 homol
447 Title : homol
448 Usage : homol();
449 Function: Returns the homol entry associated with the object.
450 Example : $homol = $unigene->homol or $unigene->homol($homol)
451 Returns : A string
452 Args : None or a homol entry
454 =cut
456 sub homol {
457 my $self = shift;
458 return $self->_annotation_value('homol', @_);
462 =head2 restr_expr
464 Title : restr_expr
465 Usage : restr_expr();
466 Function: Returns the restr_expr entry associated with the object.
467 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
468 Returns : A string
469 Args : None or a restr_expr entry
471 =cut
473 sub restr_expr {
474 my $self = shift;
475 return $self->_annotation_value('restr_expr', @_);
479 =head2 gnm_terminus
481 Title : gnm_terminus
482 Usage : gnm_terminus();
483 Function: Returns the gnm_terminus associated with the object.
484 Example : $gnm_terminus = $unigene->gnm_terminus or
485 $unigene->gnm_terminus($gnm_terminus)
486 Returns : A string
487 Args : None or a gnm_terminus
489 =cut
491 sub gnm_terminus {
492 my $self = shift;
493 return $self->_annotation_value('gnm_terminus', @_);
496 =head2 scount
498 Title : scount
499 Usage : scount();
500 Function: Returns the scount associated with the object.
501 Example : $scount = $unigene->scount or $unigene->scount($scount)
502 Returns : A string
503 Args : None or a scount
505 =cut
507 sub scount {
508 my ($obj,$value) = @_;
509 if( defined $value) {
510 $obj->{'scount'} = $value;
511 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
512 $obj->{'scount'} = $obj->size();
514 return $obj->{'scount'};
518 =head2 express
520 Title : express
521 Usage : express();
522 Function: Returns or stores a reference to an array containing
523 tissue expression data
524 Returns : An array reference
525 Args : None or an array reference
527 =cut
529 sub express {
530 my $self = shift;
532 return $self->_annotation_value_ary('expressed',@_);
536 =head2 chromosome
538 Title : chromosome
539 Usage : chromosome();
540 Function: Returns or stores a reference to an array containing
541 chromosome lines
542 Returns : An array reference
543 Args : None or an array reference
545 =cut
547 sub chromosome {
548 my $self = shift;
550 return $self->_annotation_value_ary('chromosome',@_);
554 =head2 sts
556 Title : sts
557 Usage : sts();
558 Function: Returns or stores a reference to an array containing sts lines
560 Returns : An array reference
561 Args : None or an array reference
563 =cut
565 sub sts {
566 my $self = shift;
568 return $self->_annotation_value_ary('sts',@_);
572 =head2 txmap
574 Title : txmap
575 Usage : txmap();
576 Function: Returns or stores a reference to an array containing txmap lines
578 Returns : An array reference
579 Args : None or an array reference
581 =cut
583 sub txmap {
584 my $self = shift;
586 return $self->_annotation_value_ary('txmap',@_);
590 =head2 protsim
592 Title : protsim
593 Usage : protsim();
594 Function: Returns or stores a reference to an array containing protsim lines
595 This should really only be used by ClusterIO, not directly
596 Returns : An array reference
597 Args : None or an array reference
599 =cut
601 sub protsim {
602 my $self = shift;
604 return $self->_annotation_value_ary('protsim',@_);
608 =head2 sequences
610 Title : sequences
611 Usage : sequences();
612 Function: Returns or stores a reference to an array containing
613 sequence data.
615 This is mostly reserved for ClusterIO parsers. You should
616 use get_members() for get and add_member()/remove_members()
617 for set.
619 Returns : An array reference, or undef
620 Args : None or an array reference or undef
622 =cut
624 sub sequences {
625 my $self = shift;
627 return $self->{'members'} = shift if @_;
628 return $self->{'members'};
631 =head2 species
633 Title : species
634 Usage : $obj->species($newval)
635 Function: Get/set the species object for this Unigene cluster.
636 Example :
637 Returns : value of species (a L<Bio::Species> object)
638 Args : on set, new value (a L<Bio::Species> object or
639 the binomial name, or undef, optional)
642 =cut
644 sub species{
645 my $self = shift;
647 if(@_) {
648 my $species = shift;
649 if($species && (! ref($species))) {
650 my @class = reverse(split(' ',$species));
651 $species = Bio::Species->new(-classification => \@class);
653 return $self->{'species'} = $species;
655 return $self->{'species'};
659 =head1 L<Bio::ClusterI> methods
661 =cut
663 =head2 display_id
665 Title : display_id
666 Usage :
667 Function: Get/set the display name or identifier for the cluster
669 This is aliased to unigene_id().
671 Returns : a string
672 Args : optional, on set the display ID ( a string)
674 =cut
676 sub display_id{
677 return shift->unigene_id(@_);
680 =head2 description
682 Title : description
683 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
684 Function: get/set for the consensus description of the cluster
686 This is aliased to title().
688 Returns : the description string
689 Args : Optional the description string
691 =cut
693 sub description{
694 return shift->title(@_);
697 =head2 size
699 Title : size
700 Usage : Bio::ClusterI->size();
701 Function: get for the size of the family,
702 calculated from the number of members
704 This is aliased to scount().
706 Returns : the size of the cluster
707 Args :
709 =cut
711 sub size {
712 my $self = shift;
714 # hard-wiring the size is allowed if there are no sequences
715 return $self->scount(@_) unless defined($self->sequences());
716 # but we can't change the number of members through this method
717 my $n = scalar(@{$self->sequences()});
718 if(@_ && ($n != $_[0])) {
719 $self->throw("Cannot change cluster size using size() from $n to ".
720 $_[0]);
722 return $n;
725 =head2 cluster_score
727 Title : cluster_score
728 Usage : $cluster ->cluster_score(100);
729 Function: get/set for cluster_score which
730 represent the score in which the clustering
731 algorithm assigns to this cluster.
733 For UniGene clusters, there really is no cluster score that
734 would come with the data. However, we provide an
735 implementation here so that you can score UniGene clusters
736 if you want to.
738 Returns : a number
739 Args : optionally, on set a number
741 =cut
743 sub cluster_score{
744 my $self = shift;
746 return $self->{'cluster_score'} = shift if @_;
747 return $self->{'cluster_score'};
750 =head2 get_members
752 Title : get_members
753 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
754 Function: retrieve the members of the family by some criteria
756 Will return all members if no criteria are provided.
758 At this time this implementation does not support
759 specifying criteria and will always return all members.
761 Returns : the array of members
762 Args :
764 =cut
766 sub get_members {
767 my $self = shift;
769 my $mems = $self->sequences() || [];
770 # already objects?
771 if(@$mems && (ref($mems->[0]) eq "HASH")) {
772 # nope, we need to build the object list from scratch
773 my @memlist = ();
774 while(my $seq = $self->next_seq()) {
775 push(@memlist, $seq);
777 # we cache this array of objects as the new member list
778 $mems = \@memlist;
779 $self->sequences($mems);
781 # done
782 return @$mems;
786 =head1 Annotatable view at the object properties
788 =cut
790 =head2 annotation
792 Title : annotation
793 Usage : $obj->annotation($newval)
794 Function: Get/set the L<Bio::AnnotationCollectionI> object for
795 this UniGene cluster.
797 Many attributes of this class are actually stored within
798 the annotation collection object as L<Bio::AnnotationI>
799 compliant objects, so you can conveniently access them
800 through the same interface as you would e.g. access
801 L<Bio::SeqI> annotation properties.
803 If you call this method in set mode and replace the
804 annotation collection with another one you should know
805 exactly what you are doing.
807 Example :
808 Returns : a L<Bio::AnnotationCollectionI> compliant object
809 Args : on set, new value (a L<Bio::AnnotationCollectionI>
810 compliant object or undef, optional)
813 =cut
815 sub annotation{
816 my $self = shift;
818 if(@_) {
819 return $self->{'annotation'} = shift;
820 } elsif(! exists($self->{'annotation'})) {
821 $self->{'annotation'} = Bio::Annotation::Collection->new();
823 return $self->{'annotation'};
827 =head1 Implementation specific methods
829 These are mostly for adding/removing to array properties, and for
830 methods with special functionality.
832 =cut
834 =head2 add_member
836 Title : add_member
837 Usage :
838 Function: Adds a member object to the list of members.
839 Example :
840 Returns : TRUE if the new member was successfuly added, and FALSE
841 otherwise.
842 Args : The member to add.
845 =cut
847 sub add_member{
848 my ($self,@mems) = @_;
850 my $memlist = $self->{'members'} || [];
851 # this is an object interface; is the member list already objects?
852 if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
853 # nope, convert to objects
854 $memlist = [$self->get_members()];
856 # add new member(s)
857 push(@$memlist, @mems);
858 # store if we created this array ref ourselves
859 $self->sequences($memlist);
860 # done
861 return 1;
864 =head2 remove_members
866 Title : remove_members
867 Usage :
868 Function: Remove the list of members for this cluster such that the
869 member list is undefined afterwards (as opposed to zero members).
870 Example :
871 Returns : the previous list of members
872 Args : none
875 =cut
877 sub remove_members{
878 my $self = shift;
880 my @mems = $self->get_members();
881 $self->sequences(undef);
882 return @mems;
886 =head2 next_locuslink
888 Title : next_locuslink
889 Usage : next_locuslink();
890 Function: Returns the next locuslink from an array referred
891 to using $obj->{'locuslink'}
893 If you call this iterator again after it returned undef, it
894 will re-cycle through the list of elements. Changes in the
895 underlying array property while you loop over this iterator
896 will not be reflected until you exhaust the iterator.
898 Example : while ( my $locuslink = $in->next_locuslink() ) {
899 print "$locuslink\n";
901 Returns : String
902 Args : None
904 =cut
906 sub next_locuslink {
907 my ($obj) = @_;
909 return $obj->_next_element("ll","locuslink");
912 =head2 next_express
914 Title : next_express
915 Usage : next_express();
916 Function: Returns the next tissue from an array referred
917 to using $obj->{'express'}
919 If you call this iterator again after it returned undef, it
920 will re-cycle through the list of elements. Changes in the
921 underlying array property while you loop over this iterator
922 will not be reflected until you exhaust the iterator.
924 Example : while ( my $express = $in->next_express() ) {
925 print "$express\n";
927 Returns : String
928 Args : None
930 =cut
932 sub next_express {
933 my ($obj) = @_;
935 return $obj->_next_element("express","express");
939 =head2 next_chromosome
941 Title : next_chromosome
942 Usage : next_chromosome();
943 Function: Returns the next chromosome line from an array referred
944 to using $obj->{'chromosome'}
946 If you call this iterator again after it returned undef, it
947 will re-cycle through the list of elements. Changes in the
948 underlying array property while you loop over this iterator
949 will not be reflected until you exhaust the iterator.
951 Example : while ( my $chromosome = $in->next_chromosome() ) {
952 print "$chromosome\n";
954 Returns : String
955 Args : None
957 =cut
959 sub next_chromosome {
960 my ($obj) = @_;
962 return $obj->_next_element("chr","chromosome");
966 =head2 next_protsim
968 Title : next_protsim
969 Usage : next_protsim();
970 Function: Returns the next protsim line from an array referred
971 to using $obj->{'protsim'}
973 If you call this iterator again after it returned undef, it
974 will re-cycle through the list of elements. Changes in the
975 underlying array property while you loop over this iterator
976 will not be reflected until you exhaust the iterator.
978 Example : while ( my $protsim = $in->next_protsim() ) {
979 print "$protsim\n";
981 Returns : String
982 Args : None
984 =cut
986 sub next_protsim {
987 my ($obj) = @_;
989 return $obj->_next_element("protsim","protsim");
993 =head2 next_sts
995 Title : next_sts
996 Usage : next_sts();
997 Function: Returns the next sts line from an array referred
998 to using $obj->{'sts'}
1000 If you call this iterator again after it returned undef, it
1001 will re-cycle through the list of elements. Changes in the
1002 underlying array property while you loop over this iterator
1003 will not be reflected until you exhaust the iterator.
1005 Example : while ( my $sts = $in->next_sts() ) {
1006 print "$sts\n";
1008 Returns : String
1009 Args : None
1011 =cut
1013 sub next_sts {
1014 my ($obj) = @_;
1016 return $obj->_next_element("sts","sts");
1020 =head2 next_txmap
1022 Title : next_txmap
1023 Usage : next_txmap();
1024 Function: Returns the next txmap line from an array
1025 referred to using $obj->{'txmap'}
1027 If you call this iterator again after it returned undef, it
1028 will re-cycle through the list of elements. Changes in the
1029 underlying array property while you loop over this iterator
1030 will not be reflected until you exhaust the iterator.
1032 Example : while ( my $tsmap = $in->next_txmap() ) {
1033 print "$txmap\n";
1035 Returns : String
1036 Args : None
1038 =cut
1040 sub next_txmap {
1041 my ($obj) = @_;
1043 return $obj->_next_element("txmap","txmap");
1046 ###############################
1047 # private method
1049 # args: prefix name for the queue
1050 # name of the method from which to re-fill
1051 # returns: the next element from that queue, or undef if the queue is empty
1052 ###############################
1053 sub _next_element{
1054 my ($self,$queuename,$meth) = @_;
1056 $queuename = "_".$queuename."_queue";
1057 if(! exists($self->{$queuename})) {
1058 # re-initialize from array of sequence data
1059 $self->{$queuename} = [@{$self->$meth() }];
1061 my $queue = $self->{$queuename};
1062 # is queue exhausted (equivalent to end of stream)?
1063 if(! @$queue) {
1064 # yes, remove queue and signal to the caller
1065 delete $self->{$queuename};
1066 return;
1068 return shift(@$queue);
1071 =head1 L<Bio::IdentifiableI> methods
1073 =cut
1075 =head2 object_id
1077 Title : object_id
1078 Usage : $string = $obj->object_id()
1079 Function: a string which represents the stable primary identifier
1080 in this namespace of this object. For DNA sequences this
1081 is its accession_number, similarly for protein sequences
1083 This is aliased to unigene_id().
1085 Returns : A scalar
1088 =cut
1090 sub object_id {
1091 return shift->unigene_id(@_);
1094 =head2 version
1096 Title : version
1097 Usage : $version = $obj->version()
1098 Function: a number which differentiates between versions of
1099 the same object. Higher numbers are considered to be
1100 later and more relevant, but a single object described
1101 the same identifier should represent the same concept
1103 Unigene clusters usually won't have a version, so this
1104 will be mostly undefined.
1106 Returns : A number
1107 Args : on set, new value (a scalar or undef, optional)
1110 =cut
1112 sub version {
1113 my $self = shift;
1115 return $self->{'version'} = shift if @_;
1116 return $self->{'version'};
1120 =head2 authority
1122 Title : authority
1123 Usage : $authority = $obj->authority()
1124 Function: a string which represents the organisation which
1125 granted the namespace, written as the DNS name for
1126 organisation (eg, wormbase.org)
1128 Returns : A scalar
1129 Args : on set, new value (a scalar or undef, optional)
1132 =cut
1134 sub authority {
1135 my $self = shift;
1137 return $self->{'authority'} = shift if @_;
1138 return $self->{'authority'};
1142 =head2 namespace
1144 Title : namespace
1145 Usage : $string = $obj->namespace()
1146 Function: A string representing the name space this identifier
1147 is valid in, often the database name or the name
1148 describing the collection
1150 Returns : A scalar
1151 Args : on set, new value (a scalar or undef, optional)
1154 =cut
1156 sub namespace {
1157 my $self = shift;
1159 return $self->{'namespace'} = shift if @_;
1160 return $self->{'namespace'};
1163 =head1 L<Bio::DescribableI> methods
1165 =cut
1167 =head2 display_name
1169 Title : display_name
1170 Usage : $string = $obj->display_name()
1171 Function: A string which is what should be displayed to the user
1172 the string should have no spaces (ideally, though a cautious
1173 user of this interface would not assumme this) and should be
1174 less than thirty characters (though again, double checking
1175 this is a good idea)
1177 This is aliased to unigene_id().
1179 Returns : A scalar
1180 Status : Virtual
1182 =cut
1184 sub display_name {
1185 return shift->unigene_id(@_);
1189 =head2 description()
1191 Title : description
1192 Usage : $string = $obj->description()
1193 Function: A text string suitable for displaying to the user a
1194 description. This string is likely to have spaces, but
1195 should not have any newlines or formatting - just plain
1196 text. The string should not be greater than 255 characters
1197 and clients can feel justified at truncating strings at 255
1198 characters for the purposes of display
1200 This is already demanded by Bio::ClusterI and hence is
1201 present anyway.
1203 Returns : A scalar
1206 =cut
1209 =head1 L<Bio::Factory::SequenceStreamI> methods
1211 =cut
1213 =head2 next_seq
1215 Title : next_seq
1216 Usage : next_seq();
1217 Function: Returns the next seq as a Seq object as defined by
1218 $seq->sequence_factory(),
1219 at present an empty Bio::Seq::RichSeq object with
1220 just the accession_number() and pid() set
1222 This iterator will not exhaust the array of member
1223 sequences. If you call next_seq() again after it returned
1224 undef, it will re-cycle through the list of member
1225 sequences.
1227 Example : while ( my $sequence = $in->next_seq() ) {
1228 print $sequence->accession_number() . "\n";
1230 Returns : Bio::PrimarySeqI object
1231 Args : None
1233 =cut
1235 sub next_seq {
1236 my ($obj) = @_;
1238 if(! exists($obj->{'_seq_queue'})) {
1239 # re-initialize from array of sequence data
1240 $obj->{'_seq_queue'} = [@{$obj->sequences()}];
1242 my $queue = $obj->{'_seq_queue'};
1243 # is queue exhausted (equivalent to end of stream)?
1244 if(! @$queue) {
1245 # yes, remove queue and signal to the caller
1246 delete $obj->{'_seq_queue'};
1247 return;
1249 # no, still data in the queue: get the next one from the queue
1250 my $seq_h = shift(@$queue);
1251 # if this is not a simple hash ref, it's an object already, and we'll
1252 # return just that
1253 return $seq_h if(ref($seq_h) ne 'HASH');
1254 # nope, we need to assemble this object from scratch
1256 # assemble the annotation collection
1257 my $ac = Bio::Annotation::Collection->new();
1258 foreach my $k (keys %$seq_h) {
1259 next if $k =~ /acc|pid|nid|version/;
1260 my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
1261 -value => $seq_h->{$k});
1262 $ac->add_Annotation($ann);
1264 # assemble the initialization parameters and create object
1265 my $seqobj = $obj->sequence_factory->create(
1266 -accession_number => $seq_h->{acc},
1267 -pid => $seq_h->{pid},
1268 # why does NCBI prepend a 'g' to its own identifiers??
1269 -primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
1270 substr($seq_h->{nid},1) : $seq_h->{nid},
1271 -display_id => $seq_h->{acc},
1272 -seq_version => $seq_h->{version},
1273 -alphabet => $obj->{'_alphabet'},
1274 -namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
1275 -authority => $obj->authority(), # default is NCBI
1276 -species => $obj->species(),
1277 -annotation => $ac
1279 return $seqobj;
1282 =head2 sequence_factory
1284 Title : sequence_factory
1285 Usage : $seqio->sequence_factory($seqfactory)
1286 Function: Get/Set the Bio::Factory::SequenceFactoryI
1287 Returns : Bio::Factory::SequenceFactoryI
1288 Args : [optional] Bio::Factory::SequenceFactoryI
1291 =cut
1293 sub sequence_factory {
1294 my ($self,$obj) = @_;
1295 if( defined $obj ) {
1296 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1297 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1299 $self->{'_seqfactory'} = $obj;
1301 $self->{'_seqfactory'};
1304 =head1 Private methods
1306 =cut
1308 =head2 _annotation_value
1310 Title : _annotation_value
1311 Usage :
1312 Function: Private method.
1313 Example :
1314 Returns : the value (a string)
1315 Args : annotation key (a string)
1316 on set, annotation value (a string)
1319 =cut
1321 sub _annotation_value{
1322 my $self = shift;
1323 my $key = shift;
1325 my ($ann, $val);
1326 if(@_) {
1327 $val = shift;
1328 if(! defined($val)) {
1329 ($ann) = $self->annotation->remove_Annotations($key);
1330 return $ann ? $ann->value() : undef;
1333 ($ann) = $self->annotation->get_Annotations($key);
1334 if(defined $ann && (! $val)) {
1335 # get mode and exists
1336 $val = $ann->value();
1337 } elsif($val) {
1338 # set mode
1339 if(!defined $ann) {
1340 $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
1341 $self->annotation->add_Annotation($ann);
1343 $ann->value($val);
1345 return $val;
1349 =head2 _annotation_value_ary
1351 Title : _annotation_value_ary
1352 Usage :
1353 Function: Private method.
1354 Example :
1355 Returns : reference to the array of values
1356 Args : annotation key (a string)
1357 on set, reference to an array holding the values
1360 =cut
1362 sub _annotation_value_ary{
1363 my ($self,$key,$arr) = @_;
1365 my $ac = $self->annotation;
1366 if($arr) {
1367 # purge first
1368 $ac->remove_Annotations($key);
1369 # then add as many values as are present
1370 foreach my $val (@$arr) {
1371 my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
1372 -tagname => $key
1374 $ac->add_Annotation($ann);
1376 } else {
1377 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1378 $arr = [@vals];
1380 return $arr;
1384 =head2 _annotation_dblink
1386 Title : _annotation_dblink
1387 Usage :
1388 Function: Private method.
1389 Example :
1390 Returns : array of accessions for the given database (namespace)
1391 Args : annotation key (a string)
1392 dbname (a string) (optional on get, mandatory on set)
1393 on set, accession or ID (a string), and version
1396 =cut
1398 sub _annotation_dblink{
1399 my ($self,$key,$dbname,$acc,$version) = @_;
1401 if($acc) {
1402 # set mode -- this is adding here
1403 my $ann = Bio::Annotation::DBLink->new(-tagname => $key,
1404 -primary_id => $acc,
1405 -database => $dbname,
1406 -version => $version);
1407 $self->annotation->add_Annotation($ann);
1408 return 1;
1409 } else {
1410 # get mode
1411 my @anns = $self->annotation->get_Annotations($key);
1412 # filter out those that don't match the requested database
1413 if($dbname) {
1414 @anns = grep { $_->database() eq $dbname; } @anns;
1416 return map { $_->primary_id(); } @anns;
1420 =head2 _remove_dblink
1422 Title : _remove_dblink
1423 Usage :
1424 Function: Private method.
1425 Example :
1426 Returns : array of accessions for the given database (namespace)
1427 Args : annotation key (a string)
1428 dbname (a string) (optional)
1431 =cut
1433 sub _remove_dblink{
1434 my ($self,$key,$dbname) = @_;
1436 my $ac = $self->annotation();
1437 my @anns = ();
1438 if($dbname) {
1439 foreach my $ann ($ac->remove_Annotations($key)) {
1440 if($ann->database() eq $dbname) {
1441 push(@anns, $ann);
1442 } else {
1443 $ac->add_Annotation($ann);
1446 } else {
1447 @anns = $ac->remove_Annotations($key);
1449 return map { $_->primary_id(); } @anns;
1453 #####################################################################
1454 # aliases for naming consistency or other reasons #
1455 #####################################################################
1457 *sequence = \&sequences;