3 # BioPerl module for Bio::Cluster::UniGene.pm
5 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://meg.otago.ac.nz/
11 # You may distribute this module under the same terms as perl itself
14 # April 17, 2002 - Initial implementation by Andrew Macgregor
15 # POD documentation - main docs before the code
19 Bio::Cluster::UniGene - UniGene object
23 use Bio::Cluster::UniGene;
26 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
27 '-format' => "unigene");
28 # note: we quote -format to keep older perl's from complaining.
30 while ( my $in = $stream->next_cluster() ) {
31 print $in->unigene_id() . "\n";
32 while ( my $sequence = $in->next_seq() ) {
33 print $sequence->accession_number() . "\n";
39 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
40 for the representation if UniGene clusters in Bioperl. It is returned
41 by the L<Bio::ClusterIO> parser for unigene format and contains all
42 the data associated with one UniGene record.
44 This class implements several interfaces and hence can be used
45 wherever instances of such interfaces are expected. In particular, the
46 interfaces are L<Bio::ClusterI> as the base interface for all cluster
47 representations, and in addition L<Bio::IdentifiableI> and
50 The following lists the UniGene specific methods that are available
51 (see below for details). Be aware next_XXX iterators take a snapshot
52 of the array property when they are first called, and this snapshot is
53 not reset until the iterator is exhausted. Hence, once called you need
54 to exhaust the iterator to see any changes that have been made to the
55 property in the meantime. You will usually want to use the
56 non-iterator equivalents and loop over the elements yourself.
58 new() - standard new call
60 unigene_id() - set/get unigene_id
62 title() - set/get title (description)
66 cytoband() - set/get cytoband
70 locuslink() - set/get locuslink
72 homol() - set/get homologene
74 gnm_terminus() - set/get gnm_terminus
76 scount() - set/get scount
78 express() - set/get express, currently takes/returns a reference to an
79 array of expressed tissues
81 next_express() - returns the next tissue expression from the expressed
84 chromosome() - set/get chromosome, currently takes/returns a reference
85 to an array of chromosome lines
87 next_chromosome() - returns the next chromosome line from the array of
90 sts() - set/get sts, currently takes/returns a reference to an array
93 next_sts() - returns the next sts line from the array of sts lines
95 txmap() - set/get txmap, currently takes/returns a reference to an
98 next_txmap() - returns the next txmap line from the array of txmap
101 protsim() - set/get protsim, currently takes/returns a reference to an
102 array of protsim lines
104 next_protsim() - returns the next protsim line from the array of
107 sequences() - set/get sequence, currently takes/returns a reference to
108 an array of references to seq info
110 next_seq() - returns a Seq object that currently only contains an
114 =head1 Implemented Interfaces
116 This class implementes the following interfaces.
120 =item Bio::Cluster::UniGeneI
122 This includes implementing Bio::ClusterI.
124 =item Bio::IdentifiableI
126 =item Bio::DescribableI
128 =item Bio::AnnotatableI
130 =item Bio::Factory::SequenceStreamI
139 User feedback is an integral part of the evolution of this and other
140 Bioperl modules. Send your comments and suggestions preferably to one
141 of the Bioperl mailing lists. Your participation is much appreciated.
143 bioperl-l@bioperl.org - General discussion
144 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
146 =head2 Reporting Bugs
148 Report bugs to the Bioperl bug tracking system to help us keep track
149 the bugs and their resolution. Bug reports can be submitted via the
152 http://bugzilla.open-bio.org/
154 =head1 AUTHOR - Andrew Macgregor
156 Email andrew at cbbc.murdoch.edu.au
160 Hilmar Lapp, hlapp at gmx.net
165 The rest of the documentation details each of the object
166 methods. Internal methods are usually preceded with a "_".
170 # Let the code begin...
173 package Bio
::Cluster
::UniGene
;
176 use Bio
::Annotation
::Collection
;
177 use Bio
::Annotation
::DBLink
;
178 use Bio
::Annotation
::SimpleValue
;
180 use Bio
::Seq
::SeqFactory
;
182 use base
qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
185 'Aga' => "Anopheles gambiae",
186 'Ame' => "Apis mellifera",
187 'At' => "Arabidopsis thaliana",
188 'Bmo' => "Bombyx mori",
189 'Bt' => "Bos taurus",
190 'Cel' => "Caenorhabditis elegans",
191 'Cfa' => "Canine familiaris",
192 'Cin' => "Ciona intestinalis",
193 'Cre' => "Chlamydomonas reinhardtii",
194 'Csa' => "Ciona savignyi",
195 'Csi' => "Citrus sinensis",
196 'Ddi' => "Dictyostelium discoideum",
197 'Dr' => "Danio rerio",
198 'Dm' => "Drosophila melanogaster",
199 'Gga' => "Gallus gallus",
200 'Gma' => "Glycine max",
201 'Han' => "Helianthus annus",
202 'Hs' => "Homo sapiens",
203 'Hma' => "Hydra magnipapillata",
204 'Hv' => "Hordeum vulgare",
205 'Lco' => "Lotus corniculatus",
206 'Les' => "Lycopersicon esculentum",
207 'Lsa' => "Lactuca sativa",
208 'Mdo' => "Malus x domestica",
209 'Mgr' => "Magnaporthe grisea",
210 'Mm' => "Mus musculus",
211 'Mtr' => "Medicago truncatula",
212 'Ncr' => "Neurospora crassa",
213 'Oar' => "Ovis aries",
214 'Omy' => "Oncorhynchus mykiss",
215 'Os' => "Oryza sativa",
216 'Ola' => "Oryzias latipes",
217 'Ppa' => "Physcomitrella patens",
218 'Pta' => "Pinus taeda",
219 'Ptp' => "Populus tremula x Populus tremuloides",
220 'Rn' => "Rattus norvegicus",
221 'Sbi' => "Sorghum bicolor",
222 'Sma' => "Schistosoma mansoni",
223 'Sof' => "Saccharum officinarum",
224 'Spu' => "Strongylocentrotus purpuratus",
225 'Ssa' => "Salmo salar",
226 'Ssc' => "Sus scrofa",
227 'Str' => "Xenopus tropicalis",
228 'Stu' => "Solanum tuberosum",
229 'Ta' => "Triticum aestivum",
230 'Tgo' => "Toxoplasma gondii",
231 'Tru' => "Takifugu rubripes",
232 'Vvi' => "Vitis vinifera",
233 'Xl' => "Xenopus laevis",
241 Usage : used by ClusterIO
242 Returns : a new Bio::Cluster::Unigene object
247 # standard new call..
248 my($caller,@args) = @_;
249 my $self = $caller->SUPER::new
(@args);
251 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
252 $self->_rearrange([qw(UNIGENE_ID
265 $self->{'_alphabet'} = 'dna';
267 $self->unigene_id($ugid) if $ugid;
268 $self->description($desc) if $desc;
269 $self->sequences($mems) if $mems;
270 $self->size($size) if defined($size);
271 $self->display_id($dispid) if $dispid; # overwrites ugid
272 $self->object_id($id) if $id; # overwrites dispid
273 $self->namespace($ns || 'UniGene');
274 $self->authority($auth || 'NCBI');
275 $self->version($v) if defined($v);
276 if( ! defined $seqfact ) {
277 $seqfact = Bio
::Seq
::SeqFactory
->new
278 (-verbose
=> $self->verbose(),
279 -type
=> 'Bio::Seq::RichSeq');
281 $self->sequence_factory($seqfact);
282 if( (! $species) && (defined $self->unigene_id() &&
283 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
284 # try set a default one depending on the ID
285 $species = $species_map{$1};
287 $self->species($species);
292 =head1 L<Bio::Cluster::UniGeneI> methods
299 Usage : unigene_id();
300 Function: Returns the unigene_id associated with the object.
301 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
309 my ($obj,$value) = @_;
310 if( defined $value) {
311 $obj->{'unigene_id'} = $value;
313 return $obj->{'unigene_id'};
322 Function: Returns the title associated with the object.
323 Example : $title = $unigene->title or $unigene->title($title)
325 Args : None or a title
331 my ($obj,$value) = @_;
332 if( defined $value) {
333 $obj->{'title'} = $value;
335 return $obj->{'title'};
343 Function: Returns the gene associated with the object.
344 Example : $gene = $unigene->gene or $unigene->gene($gene)
346 Args : None or a gene
353 return $self->_annotation_value('gene_name', @_);
361 Function: Returns the cytoband associated with the object.
362 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
364 Args : None or a cytoband
371 return $self->_annotation_value('cyto_band', @_);
378 Function: Returns the mgi associated with the object.
379 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
392 $self->_remove_dblink('dblink','MGI');
393 # then add if a valid value is present
395 $self->_annotation_dblink('dblink','MGI',$acc);
398 ($acc) = $self->_annotation_dblink('dblink','MGI');
408 Function: Returns or stores a reference to an array containing locuslink data.
409 Returns : An array reference
410 Args : None or an array reference
419 $self->_remove_dblink('dblink','LocusLink');
420 # then add as many accessions as are present
421 foreach my $acc (@
$ll) {
422 $self->_annotation_dblink('dblink','LocusLink',$acc);
425 my @accs = $self->_annotation_dblink('dblink','LocusLink');
436 Function: Returns the homol entry associated with the object.
437 Example : $homol = $unigene->homol or $unigene->homol($homol)
439 Args : None or a homol entry
445 return $self->_annotation_value('homol', @_);
452 Usage : restr_expr();
453 Function: Returns the restr_expr entry associated with the object.
454 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
456 Args : None or a restr_expr entry
462 return $self->_annotation_value('restr_expr', @_);
469 Usage : gnm_terminus();
470 Function: Returns the gnm_terminus associated with the object.
471 Example : $gnm_terminus = $unigene->gnm_terminus or
472 $unigene->gnm_terminus($gnm_terminus)
474 Args : None or a gnm_terminus
480 return $self->_annotation_value('gnm_terminus', @_);
487 Function: Returns the scount associated with the object.
488 Example : $scount = $unigene->scount or $unigene->scount($scount)
490 Args : None or a scount
495 my ($obj,$value) = @_;
496 if( defined $value) {
497 $obj->{'scount'} = $value;
498 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
499 $obj->{'scount'} = $obj->size();
501 return $obj->{'scount'};
509 Function: Returns or stores a reference to an array containing
510 tissue expression data
511 Returns : An array reference
512 Args : None or an array reference
519 return $self->_annotation_value_ary('expressed',@_);
526 Usage : chromosome();
527 Function: Returns or stores a reference to an array containing
529 Returns : An array reference
530 Args : None or an array reference
537 return $self->_annotation_value_ary('chromosome',@_);
545 Function: Returns or stores a reference to an array containing sts lines
547 Returns : An array reference
548 Args : None or an array reference
555 return $self->_annotation_value_ary('sts',@_);
563 Function: Returns or stores a reference to an array containing txmap lines
565 Returns : An array reference
566 Args : None or an array reference
573 return $self->_annotation_value_ary('txmap',@_);
581 Function: Returns or stores a reference to an array containing protsim lines
582 This should really only be used by ClusterIO, not directly
583 Returns : An array reference
584 Args : None or an array reference
591 return $self->_annotation_value_ary('protsim',@_);
599 Function: Returns or stores a reference to an array containing
602 This is mostly reserved for ClusterIO parsers. You should
603 use get_members() for get and add_member()/remove_members()
606 Returns : An array reference, or undef
607 Args : None or an array reference or undef
614 return $self->{'members'} = shift if @_;
615 return $self->{'members'};
621 Usage : $obj->species($newval)
622 Function: Get/set the species object for this Unigene cluster.
624 Returns : value of species (a L<Bio::Species> object)
625 Args : on set, new value (a L<Bio::Species> object or
626 the binomial name, or undef, optional)
636 if($species && (! ref($species))) {
637 my @class = reverse(split(' ',$species));
638 $species = Bio
::Species
->new(-classification
=> \
@class);
640 return $self->{'species'} = $species;
642 return $self->{'species'};
646 =head1 L<Bio::ClusterI> methods
654 Function: Get/set the display name or identifier for the cluster
656 This is aliased to unigene_id().
659 Args : optional, on set the display ID ( a string)
664 return shift->unigene_id(@_);
670 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
671 Function: get/set for the consensus description of the cluster
673 This is aliased to title().
675 Returns : the description string
676 Args : Optional the description string
681 return shift->title(@_);
687 Usage : Bio::ClusterI->size();
688 Function: get for the size of the family,
689 calculated from the number of members
691 This is aliased to scount().
693 Returns : the size of the cluster
701 # hard-wiring the size is allowed if there are no sequences
702 return $self->scount(@_) unless defined($self->sequences());
703 # but we can't change the number of members through this method
704 my $n = scalar(@
{$self->sequences()});
705 if(@_ && ($n != $_[0])) {
706 $self->throw("Cannot change cluster size using size() from $n to ".
714 Title : cluster_score
715 Usage : $cluster ->cluster_score(100);
716 Function: get/set for cluster_score which
717 represent the score in which the clustering
718 algorithm assigns to this cluster.
720 For UniGene clusters, there really is no cluster score that
721 would come with the data. However, we provide an
722 implementation here so that you can score UniGene clusters
726 Args : optionally, on set a number
733 return $self->{'cluster_score'} = shift if @_;
734 return $self->{'cluster_score'};
740 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
741 Function: retrieve the members of the family by some criteria
743 Will return all members if no criteria are provided.
745 At this time this implementation does not support
746 specifying criteria and will always return all members.
748 Returns : the array of members
756 my $mems = $self->sequences() || [];
758 if(@
$mems && (ref($mems->[0]) eq "HASH")) {
759 # nope, we need to build the object list from scratch
761 while(my $seq = $self->next_seq()) {
762 push(@memlist, $seq);
764 # we cache this array of objects as the new member list
766 $self->sequences($mems);
773 =head1 Annotatable view at the object properties
780 Usage : $obj->annotation($newval)
781 Function: Get/set the L<Bio::AnnotationCollectionI> object for
782 this UniGene cluster.
784 Many attributes of this class are actually stored within
785 the annotation collection object as L<Bio::AnnotationI>
786 compliant objects, so you can conveniently access them
787 through the same interface as you would e.g. access
788 L<Bio::SeqI> annotation properties.
790 If you call this method in set mode and replace the
791 annotation collection with another one you should know
792 exactly what you are doing.
795 Returns : a L<Bio::AnnotationCollectionI> compliant object
796 Args : on set, new value (a L<Bio::AnnotationCollectionI>
797 compliant object or undef, optional)
806 return $self->{'annotation'} = shift;
807 } elsif(! exists($self->{'annotation'})) {
808 $self->{'annotation'} = Bio
::Annotation
::Collection
->new();
810 return $self->{'annotation'};
814 =head1 Implementation specific methods
816 These are mostly for adding/removing to array properties, and for
817 methods with special functionality.
825 Function: Adds a member object to the list of members.
827 Returns : TRUE if the new member was successfuly added, and FALSE
829 Args : The member to add.
835 my ($self,@mems) = @_;
837 my $memlist = $self->{'members'} || [];
838 # this is an object interface; is the member list already objects?
839 if(@
$memlist && (ref($memlist->[0]) eq "HASH")) {
840 # nope, convert to objects
841 $memlist = [$self->get_members()];
844 push(@
$memlist, @mems);
845 # store if we created this array ref ourselves
846 $self->sequences($memlist);
851 =head2 remove_members
853 Title : remove_members
855 Function: Remove the list of members for this cluster such that the
856 member list is undefined afterwards (as opposed to zero members).
858 Returns : the previous list of members
867 my @mems = $self->get_members();
868 $self->sequences(undef);
873 =head2 next_locuslink
875 Title : next_locuslink
876 Usage : next_locuslink();
877 Function: Returns the next locuslink from an array referred
878 to using $obj->{'locuslink'}
880 If you call this iterator again after it returned undef, it
881 will re-cycle through the list of elements. Changes in the
882 underlying array property while you loop over this iterator
883 will not be reflected until you exhaust the iterator.
885 Example : while ( my $locuslink = $in->next_locuslink() ) {
886 print "$locuslink\n";
896 return $obj->_next_element("ll","locuslink");
902 Usage : next_express();
903 Function: Returns the next tissue from an array referred
904 to using $obj->{'express'}
906 If you call this iterator again after it returned undef, it
907 will re-cycle through the list of elements. Changes in the
908 underlying array property while you loop over this iterator
909 will not be reflected until you exhaust the iterator.
911 Example : while ( my $express = $in->next_express() ) {
922 return $obj->_next_element("express","express");
926 =head2 next_chromosome
928 Title : next_chromosome
929 Usage : next_chromosome();
930 Function: Returns the next chromosome line from an array referred
931 to using $obj->{'chromosome'}
933 If you call this iterator again after it returned undef, it
934 will re-cycle through the list of elements. Changes in the
935 underlying array property while you loop over this iterator
936 will not be reflected until you exhaust the iterator.
938 Example : while ( my $chromosome = $in->next_chromosome() ) {
939 print "$chromosome\n";
946 sub next_chromosome
{
949 return $obj->_next_element("chr","chromosome");
956 Usage : next_protsim();
957 Function: Returns the next protsim line from an array referred
958 to using $obj->{'protsim'}
960 If you call this iterator again after it returned undef, it
961 will re-cycle through the list of elements. Changes in the
962 underlying array property while you loop over this iterator
963 will not be reflected until you exhaust the iterator.
965 Example : while ( my $protsim = $in->next_protsim() ) {
976 return $obj->_next_element("protsim","protsim");
984 Function: Returns the next sts line from an array referred
985 to using $obj->{'sts'}
987 If you call this iterator again after it returned undef, it
988 will re-cycle through the list of elements. Changes in the
989 underlying array property while you loop over this iterator
990 will not be reflected until you exhaust the iterator.
992 Example : while ( my $sts = $in->next_sts() ) {
1003 return $obj->_next_element("sts","sts");
1010 Usage : next_txmap();
1011 Function: Returns the next txmap line from an array
1012 referred to using $obj->{'txmap'}
1014 If you call this iterator again after it returned undef, it
1015 will re-cycle through the list of elements. Changes in the
1016 underlying array property while you loop over this iterator
1017 will not be reflected until you exhaust the iterator.
1019 Example : while ( my $tsmap = $in->next_txmap() ) {
1030 return $obj->_next_element("txmap","txmap");
1033 ###############################
1036 # args: prefix name for the queue
1037 # name of the method from which to re-fill
1038 # returns: the next element from that queue, or undef if the queue is empty
1039 ###############################
1041 my ($self,$queuename,$meth) = @_;
1043 $queuename = "_".$queuename."_queue";
1044 if(! exists($self->{$queuename})) {
1045 # re-initialize from array of sequence data
1046 $self->{$queuename} = [@
{$self->$meth() }];
1048 my $queue = $self->{$queuename};
1049 # is queue exhausted (equivalent to end of stream)?
1051 # yes, remove queue and signal to the caller
1052 delete $self->{$queuename};
1055 return shift(@
$queue);
1058 =head1 L<Bio::IdentifiableI> methods
1065 Usage : $string = $obj->object_id()
1066 Function: a string which represents the stable primary identifier
1067 in this namespace of this object. For DNA sequences this
1068 is its accession_number, similarly for protein sequences
1070 This is aliased to unigene_id().
1078 return shift->unigene_id(@_);
1084 Usage : $version = $obj->version()
1085 Function: a number which differentiates between versions of
1086 the same object. Higher numbers are considered to be
1087 later and more relevant, but a single object described
1088 the same identifier should represent the same concept
1090 Unigene clusters usually won't have a version, so this
1091 will be mostly undefined.
1094 Args : on set, new value (a scalar or undef, optional)
1102 return $self->{'version'} = shift if @_;
1103 return $self->{'version'};
1110 Usage : $authority = $obj->authority()
1111 Function: a string which represents the organisation which
1112 granted the namespace, written as the DNS name for
1113 organisation (eg, wormbase.org)
1116 Args : on set, new value (a scalar or undef, optional)
1124 return $self->{'authority'} = shift if @_;
1125 return $self->{'authority'};
1132 Usage : $string = $obj->namespace()
1133 Function: A string representing the name space this identifier
1134 is valid in, often the database name or the name
1135 describing the collection
1138 Args : on set, new value (a scalar or undef, optional)
1146 return $self->{'namespace'} = shift if @_;
1147 return $self->{'namespace'};
1150 =head1 L<Bio::DescribableI> methods
1156 Title : display_name
1157 Usage : $string = $obj->display_name()
1158 Function: A string which is what should be displayed to the user
1159 the string should have no spaces (ideally, though a cautious
1160 user of this interface would not assumme this) and should be
1161 less than thirty characters (though again, double checking
1162 this is a good idea)
1164 This is aliased to unigene_id().
1172 return shift->unigene_id(@_);
1176 =head2 description()
1179 Usage : $string = $obj->description()
1180 Function: A text string suitable for displaying to the user a
1181 description. This string is likely to have spaces, but
1182 should not have any newlines or formatting - just plain
1183 text. The string should not be greater than 255 characters
1184 and clients can feel justified at truncating strings at 255
1185 characters for the purposes of display
1187 This is already demanded by Bio::ClusterI and hence is
1196 =head1 L<Bio::Factory::SequenceStreamI> methods
1204 Function: Returns the next seq as a Seq object as defined by
1205 $seq->sequence_factory(),
1206 at present an empty Bio::Seq::RichSeq object with
1207 just the accession_number() and pid() set
1209 This iterator will not exhaust the array of member
1210 sequences. If you call next_seq() again after it returned
1211 undef, it will re-cycle through the list of member
1214 Example : while ( my $sequence = $in->next_seq() ) {
1215 print $sequence->accession_number() . "\n";
1217 Returns : Bio::PrimarySeqI object
1225 if(! exists($obj->{'_seq_queue'})) {
1226 # re-initialize from array of sequence data
1227 $obj->{'_seq_queue'} = [@
{$obj->sequences()}];
1229 my $queue = $obj->{'_seq_queue'};
1230 # is queue exhausted (equivalent to end of stream)?
1232 # yes, remove queue and signal to the caller
1233 delete $obj->{'_seq_queue'};
1236 # no, still data in the queue: get the next one from the queue
1237 my $seq_h = shift(@
$queue);
1238 # if this is not a simple hash ref, it's an object already, and we'll
1240 return $seq_h if(ref($seq_h) ne 'HASH');
1241 # nope, we need to assemble this object from scratch
1243 # assemble the annotation collection
1244 my $ac = Bio
::Annotation
::Collection
->new();
1245 foreach my $k (keys %$seq_h) {
1246 next if $k =~ /acc|pid|nid|version/;
1247 my $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $k,
1248 -value
=> $seq_h->{$k});
1249 $ac->add_Annotation($ann);
1251 # assemble the initialization parameters and create object
1252 my $seqobj = $obj->sequence_factory->create(
1253 -accession_number
=> $seq_h->{acc
},
1254 -pid
=> $seq_h->{pid
},
1255 # why does NCBI prepend a 'g' to its own identifiers??
1256 -primary_id
=> $seq_h->{nid
} && $seq_h->{nid
} =~ /^g\d+$/ ?
1257 substr($seq_h->{nid
},1) : $seq_h->{nid
},
1258 -display_id
=> $seq_h->{acc
},
1259 -seq_version
=> $seq_h->{version
},
1260 -alphabet
=> $obj->{'_alphabet'},
1261 -namespace
=> $seq_h->{acc
} =~ /^NM_/ ?
'RefSeq' : 'GenBank',
1262 -authority
=> $obj->authority(), # default is NCBI
1263 -species
=> $obj->species(),
1269 =head2 sequence_factory
1271 Title : sequence_factory
1272 Usage : $seqio->sequence_factory($seqfactory)
1273 Function: Get/Set the Bio::Factory::SequenceFactoryI
1274 Returns : Bio::Factory::SequenceFactoryI
1275 Args : [optional] Bio::Factory::SequenceFactoryI
1280 sub sequence_factory
{
1281 my ($self,$obj) = @_;
1282 if( defined $obj ) {
1283 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1284 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1286 $self->{'_seqfactory'} = $obj;
1288 $self->{'_seqfactory'};
1291 =head1 Private methods
1295 =head2 _annotation_value
1297 Title : _annotation_value
1299 Function: Private method.
1301 Returns : the value (a string)
1302 Args : annotation key (a string)
1303 on set, annotation value (a string)
1308 sub _annotation_value
{
1315 if(! defined($val)) {
1316 ($ann) = $self->annotation->remove_Annotations($key);
1317 return $ann ?
$ann->value() : undef;
1320 ($ann) = $self->annotation->get_Annotations($key);
1321 if(defined $ann && (! $val)) {
1322 # get mode and exists
1323 $val = $ann->value();
1327 $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $key);
1328 $self->annotation->add_Annotation($ann);
1336 =head2 _annotation_value_ary
1338 Title : _annotation_value_ary
1340 Function: Private method.
1342 Returns : reference to the array of values
1343 Args : annotation key (a string)
1344 on set, reference to an array holding the values
1349 sub _annotation_value_ary
{
1350 my ($self,$key,$arr) = @_;
1352 my $ac = $self->annotation;
1355 $ac->remove_Annotations($key);
1356 # then add as many values as are present
1357 foreach my $val (@
$arr) {
1358 my $ann = Bio
::Annotation
::SimpleValue
->new(-value
=> $val,
1361 $ac->add_Annotation($ann);
1364 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1371 =head2 _annotation_dblink
1373 Title : _annotation_dblink
1375 Function: Private method.
1377 Returns : array of accessions for the given database (namespace)
1378 Args : annotation key (a string)
1379 dbname (a string) (optional on get, mandatory on set)
1380 on set, accession or ID (a string), and version
1385 sub _annotation_dblink
{
1386 my ($self,$key,$dbname,$acc,$version) = @_;
1389 # set mode -- this is adding here
1390 my $ann = Bio
::Annotation
::DBLink
->new(-tagname
=> $key,
1391 -primary_id
=> $acc,
1392 -database
=> $dbname,
1393 -version
=> $version);
1394 $self->annotation->add_Annotation($ann);
1398 my @anns = $self->annotation->get_Annotations($key);
1399 # filter out those that don't match the requested database
1401 @anns = grep { $_->database() eq $dbname; } @anns;
1403 return map { $_->primary_id(); } @anns;
1407 =head2 _remove_dblink
1409 Title : _remove_dblink
1411 Function: Private method.
1413 Returns : array of accessions for the given database (namespace)
1414 Args : annotation key (a string)
1415 dbname (a string) (optional)
1421 my ($self,$key,$dbname) = @_;
1423 my $ac = $self->annotation();
1426 foreach my $ann ($ac->remove_Annotations($key)) {
1427 if($ann->database() eq $dbname) {
1430 $ac->add_Annotation($ann);
1434 @anns = $ac->remove_Annotations($key);
1436 return map { $_->primary_id(); } @anns;
1440 #####################################################################
1441 # aliases for naming consistency or other reasons #
1442 #####################################################################
1444 *sequence
= \
&sequences
;