Sync README and README.md
[bioperl-live.git] / Bio / Cluster / UniGene.pm
blob1fedd353a8ff40a18163236d9a23bce3a52a57cc
2 # BioPerl module for Bio::Cluster::UniGene.pm
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
8 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
9 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
10 # http://meg.otago.ac.nz/
12 # You may distribute this module under the same terms as perl itself
14 # _history
15 # April 17, 2002 - Initial implementation by Andrew Macgregor
16 # POD documentation - main docs before the code
18 =head1 NAME
20 Bio::Cluster::UniGene - UniGene object
22 =head1 SYNOPSIS
24 use Bio::Cluster::UniGene;
25 use Bio::ClusterIO;
27 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
28 '-format' => "unigene");
29 # note: we quote -format to keep older perl's from complaining.
31 while ( my $in = $stream->next_cluster() ) {
32 print $in->unigene_id() . "\n";
33 while ( my $sequence = $in->next_seq() ) {
34 print $sequence->accession_number() . "\n";
38 =head1 DESCRIPTION
40 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
41 for the representation if UniGene clusters in Bioperl. It is returned
42 by the L<Bio::ClusterIO> parser for unigene format and contains all
43 the data associated with one UniGene record.
45 This class implements several interfaces and hence can be used
46 wherever instances of such interfaces are expected. In particular, the
47 interfaces are L<Bio::ClusterI> as the base interface for all cluster
48 representations, and in addition L<Bio::IdentifiableI> and
49 L<Bio::DescribableI>.
51 The following lists the UniGene specific methods that are available
52 (see below for details). Be aware next_XXX iterators take a snapshot
53 of the array property when they are first called, and this snapshot is
54 not reset until the iterator is exhausted. Hence, once called you need
55 to exhaust the iterator to see any changes that have been made to the
56 property in the meantime. You will usually want to use the
57 non-iterator equivalents and loop over the elements yourself.
59 new() - standard new call
61 unigene_id() - set/get unigene_id
63 title() - set/get title (description)
65 gene() - set/get gene
67 cytoband() - set/get cytoband
69 mgi() - set/get mgi
71 locuslink() - set/get locuslink
73 homol() - set/get homologene
75 gnm_terminus() - set/get gnm_terminus
77 scount() - set/get scount
79 express() - set/get express, currently takes/returns a reference to an
80 array of expressed tissues
82 next_express() - returns the next tissue expression from the expressed
83 tissue array
85 chromosome() - set/get chromosome, currently takes/returns a reference
86 to an array of chromosome lines
88 next_chromosome() - returns the next chromosome line from the array of
89 chromosome lines
91 sts() - set/get sts, currently takes/returns a reference to an array
92 of sts lines
94 next_sts() - returns the next sts line from the array of sts lines
96 txmap() - set/get txmap, currently takes/returns a reference to an
97 array of txmap lines
99 next_txmap() - returns the next txmap line from the array of txmap
100 lines
102 protsim() - set/get protsim, currently takes/returns a reference to an
103 array of protsim lines
105 next_protsim() - returns the next protsim line from the array of
106 protsim lines
108 sequences() - set/get sequence, currently takes/returns a reference to
109 an array of references to seq info
111 next_seq() - returns a Seq object that currently only contains an
112 accession number
115 =head1 Implemented Interfaces
117 This class implementes the following interfaces.
119 =over 4
121 =item Bio::Cluster::UniGeneI
123 This includes implementing Bio::ClusterI.
125 =item Bio::IdentifiableI
127 =item Bio::DescribableI
129 =item Bio::AnnotatableI
131 =item Bio::Factory::SequenceStreamI
133 =back
135 =head1 FEEDBACK
138 =head2 Mailing Lists
140 User feedback is an integral part of the evolution of this and other
141 Bioperl modules. Send your comments and suggestions preferably to one
142 of the Bioperl mailing lists. Your participation is much appreciated.
144 bioperl-l@bioperl.org - General discussion
145 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
147 =head2 Support
149 Please direct usage questions or support issues to the mailing list:
151 I<bioperl-l@bioperl.org>
153 rather than to the module maintainer directly. Many experienced and
154 reponsive experts will be able look at the problem and quickly
155 address it. Please include a thorough description of the problem
156 with code and data examples if at all possible.
158 =head2 Reporting Bugs
160 Report bugs to the Bioperl bug tracking system to help us keep track
161 the bugs and their resolution. Bug reports can be submitted via the
162 web:
164 https://github.com/bioperl/bioperl-live/issues
166 =head1 AUTHOR - Andrew Macgregor
168 Email andrew at cbbc.murdoch.edu.au
170 =head1 CONTRIBUTORS
172 Hilmar Lapp, hlapp at gmx.net
174 =head1 APPENDIX
177 The rest of the documentation details each of the object
178 methods. Internal methods are usually preceded with a "_".
180 =cut
182 # Let the code begin...
185 package Bio::Cluster::UniGene;
186 use strict;
188 use Bio::Annotation::Collection;
189 use Bio::Annotation::DBLink;
190 use Bio::Annotation::SimpleValue;
191 use Bio::Species;
192 use Bio::Seq::SeqFactory;
194 use base qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
196 my %species_map = (
197 'Aga' => "Anopheles gambiae",
198 'Ame' => "Apis mellifera",
199 'At' => "Arabidopsis thaliana",
200 'Bmo' => "Bombyx mori",
201 'Bt' => "Bos taurus",
202 'Cel' => "Caenorhabditis elegans",
203 'Cfa' => "Canine familiaris",
204 'Cin' => "Ciona intestinalis",
205 'Cre' => "Chlamydomonas reinhardtii",
206 'Csa' => "Ciona savignyi",
207 'Csi' => "Citrus sinensis",
208 'Ddi' => "Dictyostelium discoideum",
209 'Dr' => "Danio rerio",
210 'Dm' => "Drosophila melanogaster",
211 'Gga' => "Gallus gallus",
212 'Gma' => "Glycine max",
213 'Han' => "Helianthus annus",
214 'Hs' => "Homo sapiens",
215 'Hma' => "Hydra magnipapillata",
216 'Hv' => "Hordeum vulgare",
217 'Lco' => "Lotus corniculatus",
218 'Les' => "Lycopersicon esculentum",
219 'Lsa' => "Lactuca sativa",
220 'Mdo' => "Malus x domestica",
221 'Mgr' => "Magnaporthe grisea",
222 'Mm' => "Mus musculus",
223 'Mtr' => "Medicago truncatula",
224 'Ncr' => "Neurospora crassa",
225 'Oar' => "Ovis aries",
226 'Omy' => "Oncorhynchus mykiss",
227 'Os' => "Oryza sativa",
228 'Ola' => "Oryzias latipes",
229 'Ppa' => "Physcomitrella patens",
230 'Pta' => "Pinus taeda",
231 'Ptp' => "Populus tremula x Populus tremuloides",
232 'Rn' => "Rattus norvegicus",
233 'Sbi' => "Sorghum bicolor",
234 'Sma' => "Schistosoma mansoni",
235 'Sof' => "Saccharum officinarum",
236 'Spu' => "Strongylocentrotus purpuratus",
237 'Ssa' => "Salmo salar",
238 'Ssc' => "Sus scrofa",
239 'Str' => "Xenopus tropicalis",
240 'Stu' => "Solanum tuberosum",
241 'Ta' => "Triticum aestivum",
242 'Tgo' => "Toxoplasma gondii",
243 'Tru' => "Takifugu rubripes",
244 'Vvi' => "Vitis vinifera",
245 'Xl' => "Xenopus laevis",
246 'Zm' => "Zea mays",
250 =head2 new
252 Title : new
253 Usage : used by ClusterIO
254 Returns : a new Bio::Cluster::Unigene object
256 =cut
258 sub new {
259 # standard new call..
260 my($caller,@args) = @_;
261 my $self = $caller->SUPER::new(@args);
263 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
264 $self->_rearrange([qw(UNIGENE_ID
265 DESCRIPTION
266 MEMBERS
267 SIZE
268 SPECIES
269 DISPLAY_ID
270 OBJECT_ID
271 NAMESPACE
272 AUTHORITY
273 VERSION
274 SEQFACTORY
275 )], @args);
277 $self->{'_alphabet'} = 'dna';
279 $self->unigene_id($ugid) if $ugid;
280 $self->description($desc) if $desc;
281 $self->sequences($mems) if $mems;
282 $self->size($size) if defined($size);
283 $self->display_id($dispid) if $dispid; # overwrites ugid
284 $self->object_id($id) if $id; # overwrites dispid
285 $self->namespace($ns || 'UniGene');
286 $self->authority($auth || 'NCBI');
287 $self->version($v) if defined($v);
288 if( ! defined $seqfact ) {
289 $seqfact = Bio::Seq::SeqFactory->new
290 (-verbose => $self->verbose(),
291 -type => 'Bio::Seq::RichSeq');
293 $self->sequence_factory($seqfact);
294 if( (! $species) && (defined $self->unigene_id() &&
295 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
296 # try set a default one depending on the ID
297 $species = $species_map{$1};
299 $self->species($species);
300 return $self;
304 =head1 L<Bio::Cluster::UniGeneI> methods
306 =cut
308 =head2 unigene_id
310 Title : unigene_id
311 Usage : unigene_id();
312 Function: Returns the unigene_id associated with the object.
313 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
314 Returns : A string
315 Args : None or an id
318 =cut
320 sub unigene_id {
321 my ($obj,$value) = @_;
322 if( defined $value) {
323 $obj->{'unigene_id'} = $value;
325 return $obj->{'unigene_id'};
330 =head2 title
332 Title : title
333 Usage : title();
334 Function: Returns the title associated with the object.
335 Example : $title = $unigene->title or $unigene->title($title)
336 Returns : A string
337 Args : None or a title
340 =cut
342 sub title {
343 my ($obj,$value) = @_;
344 if( defined $value) {
345 $obj->{'title'} = $value;
347 return $obj->{'title'};
351 =head2 gene
353 Title : gene
354 Usage : gene();
355 Function: Returns the gene associated with the object.
356 Example : $gene = $unigene->gene or $unigene->gene($gene)
357 Returns : A string
358 Args : None or a gene
361 =cut
363 sub gene {
364 my $self = shift;
365 return $self->_annotation_value('gene_name', @_);
369 =head2 cytoband
371 Title : cytoband
372 Usage : cytoband();
373 Function: Returns the cytoband associated with the object.
374 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
375 Returns : A string
376 Args : None or a cytoband
379 =cut
381 sub cytoband {
382 my $self = shift;
383 return $self->_annotation_value('cyto_band', @_);
386 =head2 mgi
388 Title : mgi
389 Usage : mgi();
390 Function: Returns the mgi associated with the object.
391 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
392 Returns : A string
393 Args : None or a mgi
396 =cut
398 sub mgi {
399 my $self = shift;
400 my $acc;
402 if(@_) {
403 # purge first
404 $self->_remove_dblink('dblink','MGI');
405 # then add if a valid value is present
406 if($acc = shift) {
407 $self->_annotation_dblink('dblink','MGI',$acc);
409 } else {
410 ($acc) = $self->_annotation_dblink('dblink','MGI');
412 return $acc;
416 =head2 locuslink
418 Title : locuslink
419 Usage : locuslink();
420 Function: Returns or stores a reference to an array containing locuslink data.
421 Returns : An array reference
422 Args : None or an array reference
424 =cut
426 sub locuslink {
427 my ($self,$ll) = @_;
429 if($ll) {
430 # purge first
431 $self->_remove_dblink('dblink','LocusLink');
432 # then add as many accessions as are present
433 foreach my $acc (@$ll) {
434 $self->_annotation_dblink('dblink','LocusLink',$acc);
436 } else {
437 my @accs = $self->_annotation_dblink('dblink','LocusLink');
438 $ll = [@accs];
440 return $ll;
444 =head2 homol
446 Title : homol
447 Usage : homol();
448 Function: Returns the homol entry associated with the object.
449 Example : $homol = $unigene->homol or $unigene->homol($homol)
450 Returns : A string
451 Args : None or a homol entry
453 =cut
455 sub homol {
456 my $self = shift;
457 return $self->_annotation_value('homol', @_);
461 =head2 restr_expr
463 Title : restr_expr
464 Usage : restr_expr();
465 Function: Returns the restr_expr entry associated with the object.
466 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
467 Returns : A string
468 Args : None or a restr_expr entry
470 =cut
472 sub restr_expr {
473 my $self = shift;
474 return $self->_annotation_value('restr_expr', @_);
478 =head2 gnm_terminus
480 Title : gnm_terminus
481 Usage : gnm_terminus();
482 Function: Returns the gnm_terminus associated with the object.
483 Example : $gnm_terminus = $unigene->gnm_terminus or
484 $unigene->gnm_terminus($gnm_terminus)
485 Returns : A string
486 Args : None or a gnm_terminus
488 =cut
490 sub gnm_terminus {
491 my $self = shift;
492 return $self->_annotation_value('gnm_terminus', @_);
495 =head2 scount
497 Title : scount
498 Usage : scount();
499 Function: Returns the scount associated with the object.
500 Example : $scount = $unigene->scount or $unigene->scount($scount)
501 Returns : A string
502 Args : None or a scount
504 =cut
506 sub scount {
507 my ($obj,$value) = @_;
508 if( defined $value) {
509 $obj->{'scount'} = $value;
510 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
511 $obj->{'scount'} = $obj->size();
513 return $obj->{'scount'};
517 =head2 express
519 Title : express
520 Usage : express();
521 Function: Returns or stores a reference to an array containing
522 tissue expression data
523 Returns : An array reference
524 Args : None or an array reference
526 =cut
528 sub express {
529 my $self = shift;
531 return $self->_annotation_value_ary('expressed',@_);
535 =head2 chromosome
537 Title : chromosome
538 Usage : chromosome();
539 Function: Returns or stores a reference to an array containing
540 chromosome lines
541 Returns : An array reference
542 Args : None or an array reference
544 =cut
546 sub chromosome {
547 my $self = shift;
549 return $self->_annotation_value_ary('chromosome',@_);
553 =head2 sts
555 Title : sts
556 Usage : sts();
557 Function: Returns or stores a reference to an array containing sts lines
559 Returns : An array reference
560 Args : None or an array reference
562 =cut
564 sub sts {
565 my $self = shift;
567 return $self->_annotation_value_ary('sts',@_);
571 =head2 txmap
573 Title : txmap
574 Usage : txmap();
575 Function: Returns or stores a reference to an array containing txmap lines
577 Returns : An array reference
578 Args : None or an array reference
580 =cut
582 sub txmap {
583 my $self = shift;
585 return $self->_annotation_value_ary('txmap',@_);
589 =head2 protsim
591 Title : protsim
592 Usage : protsim();
593 Function: Returns or stores a reference to an array containing protsim lines
594 This should really only be used by ClusterIO, not directly
595 Returns : An array reference
596 Args : None or an array reference
598 =cut
600 sub protsim {
601 my $self = shift;
603 return $self->_annotation_value_ary('protsim',@_);
607 =head2 sequences
609 Title : sequences
610 Usage : sequences();
611 Function: Returns or stores a reference to an array containing
612 sequence data.
614 This is mostly reserved for ClusterIO parsers. You should
615 use get_members() for get and add_member()/remove_members()
616 for set.
618 Returns : An array reference, or undef
619 Args : None or an array reference or undef
621 =cut
623 sub sequences {
624 my $self = shift;
626 return $self->{'members'} = shift if @_;
627 return $self->{'members'};
630 =head2 species
632 Title : species
633 Usage : $obj->species($newval)
634 Function: Get/set the species object for this Unigene cluster.
635 Example :
636 Returns : value of species (a L<Bio::Species> object)
637 Args : on set, new value (a L<Bio::Species> object or
638 the binomial name, or undef, optional)
641 =cut
643 sub species{
644 my $self = shift;
646 if(@_) {
647 my $species = shift;
648 if($species && (! ref($species))) {
649 my @class = reverse(split(' ',$species));
650 $species = Bio::Species->new(-classification => \@class);
652 return $self->{'species'} = $species;
654 return $self->{'species'};
658 =head1 L<Bio::ClusterI> methods
660 =cut
662 =head2 display_id
664 Title : display_id
665 Usage :
666 Function: Get/set the display name or identifier for the cluster
668 This is aliased to unigene_id().
670 Returns : a string
671 Args : optional, on set the display ID ( a string)
673 =cut
675 sub display_id{
676 return shift->unigene_id(@_);
679 =head2 description
681 Title : description
682 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
683 Function: get/set for the consensus description of the cluster
685 This is aliased to title().
687 Returns : the description string
688 Args : Optional the description string
690 =cut
692 sub description{
693 return shift->title(@_);
696 =head2 size
698 Title : size
699 Usage : Bio::ClusterI->size();
700 Function: get for the size of the family,
701 calculated from the number of members
703 This is aliased to scount().
705 Returns : the size of the cluster
706 Args :
708 =cut
710 sub size {
711 my $self = shift;
713 # hard-wiring the size is allowed if there are no sequences
714 return $self->scount(@_) unless defined($self->sequences());
715 # but we can't change the number of members through this method
716 my $n = scalar(@{$self->sequences()});
717 if(@_ && ($n != $_[0])) {
718 $self->throw("Cannot change cluster size using size() from $n to ".
719 $_[0]);
721 return $n;
724 =head2 cluster_score
726 Title : cluster_score
727 Usage : $cluster ->cluster_score(100);
728 Function: get/set for cluster_score which
729 represent the score in which the clustering
730 algorithm assigns to this cluster.
732 For UniGene clusters, there really is no cluster score that
733 would come with the data. However, we provide an
734 implementation here so that you can score UniGene clusters
735 if you want to.
737 Returns : a number
738 Args : optionally, on set a number
740 =cut
742 sub cluster_score{
743 my $self = shift;
745 return $self->{'cluster_score'} = shift if @_;
746 return $self->{'cluster_score'};
749 =head2 get_members
751 Title : get_members
752 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
753 Function: retrieve the members of the family by some criteria
755 Will return all members if no criteria are provided.
757 At this time this implementation does not support
758 specifying criteria and will always return all members.
760 Returns : the array of members
761 Args :
763 =cut
765 sub get_members {
766 my $self = shift;
768 my $mems = $self->sequences() || [];
769 # already objects?
770 if(@$mems && (ref($mems->[0]) eq "HASH")) {
771 # nope, we need to build the object list from scratch
772 my @memlist = ();
773 while(my $seq = $self->next_seq()) {
774 push(@memlist, $seq);
776 # we cache this array of objects as the new member list
777 $mems = \@memlist;
778 $self->sequences($mems);
780 # done
781 return @$mems;
785 =head1 Annotatable view at the object properties
787 =cut
789 =head2 annotation
791 Title : annotation
792 Usage : $obj->annotation($newval)
793 Function: Get/set the L<Bio::AnnotationCollectionI> object for
794 this UniGene cluster.
796 Many attributes of this class are actually stored within
797 the annotation collection object as L<Bio::AnnotationI>
798 compliant objects, so you can conveniently access them
799 through the same interface as you would e.g. access
800 L<Bio::SeqI> annotation properties.
802 If you call this method in set mode and replace the
803 annotation collection with another one you should know
804 exactly what you are doing.
806 Example :
807 Returns : a L<Bio::AnnotationCollectionI> compliant object
808 Args : on set, new value (a L<Bio::AnnotationCollectionI>
809 compliant object or undef, optional)
812 =cut
814 sub annotation{
815 my $self = shift;
817 if(@_) {
818 return $self->{'annotation'} = shift;
819 } elsif(! exists($self->{'annotation'})) {
820 $self->{'annotation'} = Bio::Annotation::Collection->new();
822 return $self->{'annotation'};
826 =head1 Implementation specific methods
828 These are mostly for adding/removing to array properties, and for
829 methods with special functionality.
831 =cut
833 =head2 add_member
835 Title : add_member
836 Usage :
837 Function: Adds a member object to the list of members.
838 Example :
839 Returns : TRUE if the new member was successfuly added, and FALSE
840 otherwise.
841 Args : The member to add.
844 =cut
846 sub add_member{
847 my ($self,@mems) = @_;
849 my $memlist = $self->{'members'} || [];
850 # this is an object interface; is the member list already objects?
851 if(@$memlist && (ref($memlist->[0]) eq "HASH")) {
852 # nope, convert to objects
853 $memlist = [$self->get_members()];
855 # add new member(s)
856 push(@$memlist, @mems);
857 # store if we created this array ref ourselves
858 $self->sequences($memlist);
859 # done
860 return 1;
863 =head2 remove_members
865 Title : remove_members
866 Usage :
867 Function: Remove the list of members for this cluster such that the
868 member list is undefined afterwards (as opposed to zero members).
869 Example :
870 Returns : the previous list of members
871 Args : none
874 =cut
876 sub remove_members{
877 my $self = shift;
879 my @mems = $self->get_members();
880 $self->sequences(undef);
881 return @mems;
885 =head2 next_locuslink
887 Title : next_locuslink
888 Usage : next_locuslink();
889 Function: Returns the next locuslink from an array referred
890 to using $obj->{'locuslink'}
892 If you call this iterator again after it returned undef, it
893 will re-cycle through the list of elements. Changes in the
894 underlying array property while you loop over this iterator
895 will not be reflected until you exhaust the iterator.
897 Example : while ( my $locuslink = $in->next_locuslink() ) {
898 print "$locuslink\n";
900 Returns : String
901 Args : None
903 =cut
905 sub next_locuslink {
906 my ($obj) = @_;
908 return $obj->_next_element("ll","locuslink");
911 =head2 next_express
913 Title : next_express
914 Usage : next_express();
915 Function: Returns the next tissue from an array referred
916 to using $obj->{'express'}
918 If you call this iterator again after it returned undef, it
919 will re-cycle through the list of elements. Changes in the
920 underlying array property while you loop over this iterator
921 will not be reflected until you exhaust the iterator.
923 Example : while ( my $express = $in->next_express() ) {
924 print "$express\n";
926 Returns : String
927 Args : None
929 =cut
931 sub next_express {
932 my ($obj) = @_;
934 return $obj->_next_element("express","express");
938 =head2 next_chromosome
940 Title : next_chromosome
941 Usage : next_chromosome();
942 Function: Returns the next chromosome line from an array referred
943 to using $obj->{'chromosome'}
945 If you call this iterator again after it returned undef, it
946 will re-cycle through the list of elements. Changes in the
947 underlying array property while you loop over this iterator
948 will not be reflected until you exhaust the iterator.
950 Example : while ( my $chromosome = $in->next_chromosome() ) {
951 print "$chromosome\n";
953 Returns : String
954 Args : None
956 =cut
958 sub next_chromosome {
959 my ($obj) = @_;
961 return $obj->_next_element("chr","chromosome");
965 =head2 next_protsim
967 Title : next_protsim
968 Usage : next_protsim();
969 Function: Returns the next protsim line from an array referred
970 to using $obj->{'protsim'}
972 If you call this iterator again after it returned undef, it
973 will re-cycle through the list of elements. Changes in the
974 underlying array property while you loop over this iterator
975 will not be reflected until you exhaust the iterator.
977 Example : while ( my $protsim = $in->next_protsim() ) {
978 print "$protsim\n";
980 Returns : String
981 Args : None
983 =cut
985 sub next_protsim {
986 my ($obj) = @_;
988 return $obj->_next_element("protsim","protsim");
992 =head2 next_sts
994 Title : next_sts
995 Usage : next_sts();
996 Function: Returns the next sts line from an array referred
997 to using $obj->{'sts'}
999 If you call this iterator again after it returned undef, it
1000 will re-cycle through the list of elements. Changes in the
1001 underlying array property while you loop over this iterator
1002 will not be reflected until you exhaust the iterator.
1004 Example : while ( my $sts = $in->next_sts() ) {
1005 print "$sts\n";
1007 Returns : String
1008 Args : None
1010 =cut
1012 sub next_sts {
1013 my ($obj) = @_;
1015 return $obj->_next_element("sts","sts");
1019 =head2 next_txmap
1021 Title : next_txmap
1022 Usage : next_txmap();
1023 Function: Returns the next txmap line from an array
1024 referred to using $obj->{'txmap'}
1026 If you call this iterator again after it returned undef, it
1027 will re-cycle through the list of elements. Changes in the
1028 underlying array property while you loop over this iterator
1029 will not be reflected until you exhaust the iterator.
1031 Example : while ( my $tsmap = $in->next_txmap() ) {
1032 print "$txmap\n";
1034 Returns : String
1035 Args : None
1037 =cut
1039 sub next_txmap {
1040 my ($obj) = @_;
1042 return $obj->_next_element("txmap","txmap");
1045 ###############################
1046 # private method
1048 # args: prefix name for the queue
1049 # name of the method from which to re-fill
1050 # returns: the next element from that queue, or undef if the queue is empty
1051 ###############################
1052 sub _next_element{
1053 my ($self,$queuename,$meth) = @_;
1055 $queuename = "_".$queuename."_queue";
1056 if(! exists($self->{$queuename})) {
1057 # re-initialize from array of sequence data
1058 $self->{$queuename} = [@{$self->$meth() }];
1060 my $queue = $self->{$queuename};
1061 # is queue exhausted (equivalent to end of stream)?
1062 if(! @$queue) {
1063 # yes, remove queue and signal to the caller
1064 delete $self->{$queuename};
1065 return;
1067 return shift(@$queue);
1070 =head1 L<Bio::IdentifiableI> methods
1072 =cut
1074 =head2 object_id
1076 Title : object_id
1077 Usage : $string = $obj->object_id()
1078 Function: a string which represents the stable primary identifier
1079 in this namespace of this object. For DNA sequences this
1080 is its accession_number, similarly for protein sequences
1082 This is aliased to unigene_id().
1084 Returns : A scalar
1087 =cut
1089 sub object_id {
1090 return shift->unigene_id(@_);
1093 =head2 version
1095 Title : version
1096 Usage : $version = $obj->version()
1097 Function: a number which differentiates between versions of
1098 the same object. Higher numbers are considered to be
1099 later and more relevant, but a single object described
1100 the same identifier should represent the same concept
1102 Unigene clusters usually won't have a version, so this
1103 will be mostly undefined.
1105 Returns : A number
1106 Args : on set, new value (a scalar or undef, optional)
1109 =cut
1111 sub version {
1112 my $self = shift;
1114 return $self->{'version'} = shift if @_;
1115 return $self->{'version'};
1119 =head2 authority
1121 Title : authority
1122 Usage : $authority = $obj->authority()
1123 Function: a string which represents the organisation which
1124 granted the namespace, written as the DNS name for
1125 organisation (eg, wormbase.org)
1127 Returns : A scalar
1128 Args : on set, new value (a scalar or undef, optional)
1131 =cut
1133 sub authority {
1134 my $self = shift;
1136 return $self->{'authority'} = shift if @_;
1137 return $self->{'authority'};
1141 =head2 namespace
1143 Title : namespace
1144 Usage : $string = $obj->namespace()
1145 Function: A string representing the name space this identifier
1146 is valid in, often the database name or the name
1147 describing the collection
1149 Returns : A scalar
1150 Args : on set, new value (a scalar or undef, optional)
1153 =cut
1155 sub namespace {
1156 my $self = shift;
1158 return $self->{'namespace'} = shift if @_;
1159 return $self->{'namespace'};
1162 =head1 L<Bio::DescribableI> methods
1164 =cut
1166 =head2 display_name
1168 Title : display_name
1169 Usage : $string = $obj->display_name()
1170 Function: A string which is what should be displayed to the user
1171 the string should have no spaces (ideally, though a cautious
1172 user of this interface would not assumme this) and should be
1173 less than thirty characters (though again, double checking
1174 this is a good idea)
1176 This is aliased to unigene_id().
1178 Returns : A scalar
1179 Status : Virtual
1181 =cut
1183 sub display_name {
1184 return shift->unigene_id(@_);
1188 =head2 description()
1190 Title : description
1191 Usage : $string = $obj->description()
1192 Function: A text string suitable for displaying to the user a
1193 description. This string is likely to have spaces, but
1194 should not have any newlines or formatting - just plain
1195 text. The string should not be greater than 255 characters
1196 and clients can feel justified at truncating strings at 255
1197 characters for the purposes of display
1199 This is already demanded by Bio::ClusterI and hence is
1200 present anyway.
1202 Returns : A scalar
1205 =cut
1208 =head1 L<Bio::Factory::SequenceStreamI> methods
1210 =cut
1212 =head2 next_seq
1214 Title : next_seq
1215 Usage : next_seq();
1216 Function: Returns the next seq as a Seq object as defined by
1217 $seq->sequence_factory(),
1218 at present an empty Bio::Seq::RichSeq object with
1219 just the accession_number() and pid() set
1221 This iterator will not exhaust the array of member
1222 sequences. If you call next_seq() again after it returned
1223 undef, it will re-cycle through the list of member
1224 sequences.
1226 Example : while ( my $sequence = $in->next_seq() ) {
1227 print $sequence->accession_number() . "\n";
1229 Returns : Bio::PrimarySeqI object
1230 Args : None
1232 =cut
1234 sub next_seq {
1235 my ($obj) = @_;
1237 if(! exists($obj->{'_seq_queue'})) {
1238 # re-initialize from array of sequence data
1239 $obj->{'_seq_queue'} = [@{$obj->sequences()}];
1241 my $queue = $obj->{'_seq_queue'};
1242 # is queue exhausted (equivalent to end of stream)?
1243 if(! @$queue) {
1244 # yes, remove queue and signal to the caller
1245 delete $obj->{'_seq_queue'};
1246 return;
1248 # no, still data in the queue: get the next one from the queue
1249 my $seq_h = shift(@$queue);
1250 # if this is not a simple hash ref, it's an object already, and we'll
1251 # return just that
1252 return $seq_h if(ref($seq_h) ne 'HASH');
1253 # nope, we need to assemble this object from scratch
1255 # assemble the annotation collection
1256 my $ac = Bio::Annotation::Collection->new();
1257 foreach my $k (keys %$seq_h) {
1258 next if $k =~ /acc|pid|nid|version/;
1259 my $ann = Bio::Annotation::SimpleValue->new(-tagname => $k,
1260 -value => $seq_h->{$k});
1261 $ac->add_Annotation($ann);
1263 # assemble the initialization parameters and create object
1264 my $seqobj = $obj->sequence_factory->create(
1265 -accession_number => $seq_h->{acc},
1266 -pid => $seq_h->{pid},
1267 # why does NCBI prepend a 'g' to its own identifiers??
1268 -primary_id => $seq_h->{nid} && $seq_h->{nid} =~ /^g\d+$/ ?
1269 substr($seq_h->{nid},1) : $seq_h->{nid},
1270 -display_id => $seq_h->{acc},
1271 -seq_version => $seq_h->{version},
1272 -alphabet => $obj->{'_alphabet'},
1273 -namespace => $seq_h->{acc} =~ /^NM_/ ? 'RefSeq' : 'GenBank',
1274 -authority => $obj->authority(), # default is NCBI
1275 -species => $obj->species(),
1276 -annotation => $ac
1278 return $seqobj;
1281 =head2 sequence_factory
1283 Title : sequence_factory
1284 Usage : $seqio->sequence_factory($seqfactory)
1285 Function: Get/Set the Bio::Factory::SequenceFactoryI
1286 Returns : Bio::Factory::SequenceFactoryI
1287 Args : [optional] Bio::Factory::SequenceFactoryI
1290 =cut
1292 sub sequence_factory {
1293 my ($self,$obj) = @_;
1294 if( defined $obj ) {
1295 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1296 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1298 $self->{'_seqfactory'} = $obj;
1300 $self->{'_seqfactory'};
1303 =head1 Private methods
1305 =cut
1307 =head2 _annotation_value
1309 Title : _annotation_value
1310 Usage :
1311 Function: Private method.
1312 Example :
1313 Returns : the value (a string)
1314 Args : annotation key (a string)
1315 on set, annotation value (a string)
1318 =cut
1320 sub _annotation_value{
1321 my $self = shift;
1322 my $key = shift;
1324 my ($ann, $val);
1325 if(@_) {
1326 $val = shift;
1327 if(! defined($val)) {
1328 ($ann) = $self->annotation->remove_Annotations($key);
1329 return $ann ? $ann->value() : undef;
1332 ($ann) = $self->annotation->get_Annotations($key);
1333 if(defined $ann && (! $val)) {
1334 # get mode and exists
1335 $val = $ann->value();
1336 } elsif($val) {
1337 # set mode
1338 if(!defined $ann) {
1339 $ann = Bio::Annotation::SimpleValue->new(-tagname => $key);
1340 $self->annotation->add_Annotation($ann);
1342 $ann->value($val);
1344 return $val;
1348 =head2 _annotation_value_ary
1350 Title : _annotation_value_ary
1351 Usage :
1352 Function: Private method.
1353 Example :
1354 Returns : reference to the array of values
1355 Args : annotation key (a string)
1356 on set, reference to an array holding the values
1359 =cut
1361 sub _annotation_value_ary{
1362 my ($self,$key,$arr) = @_;
1364 my $ac = $self->annotation;
1365 if($arr) {
1366 # purge first
1367 $ac->remove_Annotations($key);
1368 # then add as many values as are present
1369 foreach my $val (@$arr) {
1370 my $ann = Bio::Annotation::SimpleValue->new(-value => $val,
1371 -tagname => $key
1373 $ac->add_Annotation($ann);
1375 } else {
1376 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1377 $arr = [@vals];
1379 return $arr;
1383 =head2 _annotation_dblink
1385 Title : _annotation_dblink
1386 Usage :
1387 Function: Private method.
1388 Example :
1389 Returns : array of accessions for the given database (namespace)
1390 Args : annotation key (a string)
1391 dbname (a string) (optional on get, mandatory on set)
1392 on set, accession or ID (a string), and version
1395 =cut
1397 sub _annotation_dblink{
1398 my ($self,$key,$dbname,$acc,$version) = @_;
1400 if($acc) {
1401 # set mode -- this is adding here
1402 my $ann = Bio::Annotation::DBLink->new(-tagname => $key,
1403 -primary_id => $acc,
1404 -database => $dbname,
1405 -version => $version);
1406 $self->annotation->add_Annotation($ann);
1407 return 1;
1408 } else {
1409 # get mode
1410 my @anns = $self->annotation->get_Annotations($key);
1411 # filter out those that don't match the requested database
1412 if($dbname) {
1413 @anns = grep { $_->database() eq $dbname; } @anns;
1415 return map { $_->primary_id(); } @anns;
1419 =head2 _remove_dblink
1421 Title : _remove_dblink
1422 Usage :
1423 Function: Private method.
1424 Example :
1425 Returns : array of accessions for the given database (namespace)
1426 Args : annotation key (a string)
1427 dbname (a string) (optional)
1430 =cut
1432 sub _remove_dblink{
1433 my ($self,$key,$dbname) = @_;
1435 my $ac = $self->annotation();
1436 my @anns = ();
1437 if($dbname) {
1438 foreach my $ann ($ac->remove_Annotations($key)) {
1439 if($ann->database() eq $dbname) {
1440 push(@anns, $ann);
1441 } else {
1442 $ac->add_Annotation($ann);
1445 } else {
1446 @anns = $ac->remove_Annotations($key);
1448 return map { $_->primary_id(); } @anns;
1452 #####################################################################
1453 # aliases for naming consistency or other reasons #
1454 #####################################################################
1456 *sequence = \&sequences;