3 # BioPerl module for Bio::Cluster::UniGene.pm
5 # Please direct questions and support issues to <bioperl-l@bioperl.org>
7 # Cared for by Andrew Macgregor <andrew at cbbc.murdoch.edu.au>
9 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
10 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
11 # http://meg.otago.ac.nz/
13 # You may distribute this module under the same terms as perl itself
16 # April 17, 2002 - Initial implementation by Andrew Macgregor
17 # POD documentation - main docs before the code
21 Bio::Cluster::UniGene - UniGene object
25 use Bio::Cluster::UniGene;
28 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
29 '-format' => "unigene");
30 # note: we quote -format to keep older perl's from complaining.
32 while ( my $in = $stream->next_cluster() ) {
33 print $in->unigene_id() . "\n";
34 while ( my $sequence = $in->next_seq() ) {
35 print $sequence->accession_number() . "\n";
41 This UniGene object implements the L<Bio::Cluster::UniGeneI> interface
42 for the representation if UniGene clusters in Bioperl. It is returned
43 by the L<Bio::ClusterIO> parser for unigene format and contains all
44 the data associated with one UniGene record.
46 This class implements several interfaces and hence can be used
47 wherever instances of such interfaces are expected. In particular, the
48 interfaces are L<Bio::ClusterI> as the base interface for all cluster
49 representations, and in addition L<Bio::IdentifiableI> and
52 The following lists the UniGene specific methods that are available
53 (see below for details). Be aware next_XXX iterators take a snapshot
54 of the array property when they are first called, and this snapshot is
55 not reset until the iterator is exhausted. Hence, once called you need
56 to exhaust the iterator to see any changes that have been made to the
57 property in the meantime. You will usually want to use the
58 non-iterator equivalents and loop over the elements yourself.
60 new() - standard new call
62 unigene_id() - set/get unigene_id
64 title() - set/get title (description)
68 cytoband() - set/get cytoband
72 locuslink() - set/get locuslink
74 homol() - set/get homologene
76 gnm_terminus() - set/get gnm_terminus
78 scount() - set/get scount
80 express() - set/get express, currently takes/returns a reference to an
81 array of expressed tissues
83 next_express() - returns the next tissue expression from the expressed
86 chromosome() - set/get chromosome, currently takes/returns a reference
87 to an array of chromosome lines
89 next_chromosome() - returns the next chromosome line from the array of
92 sts() - set/get sts, currently takes/returns a reference to an array
95 next_sts() - returns the next sts line from the array of sts lines
97 txmap() - set/get txmap, currently takes/returns a reference to an
100 next_txmap() - returns the next txmap line from the array of txmap
103 protsim() - set/get protsim, currently takes/returns a reference to an
104 array of protsim lines
106 next_protsim() - returns the next protsim line from the array of
109 sequences() - set/get sequence, currently takes/returns a reference to
110 an array of references to seq info
112 next_seq() - returns a Seq object that currently only contains an
116 =head1 Implemented Interfaces
118 This class implementes the following interfaces.
122 =item Bio::Cluster::UniGeneI
124 This includes implementing Bio::ClusterI.
126 =item Bio::IdentifiableI
128 =item Bio::DescribableI
130 =item Bio::AnnotatableI
132 =item Bio::Factory::SequenceStreamI
141 User feedback is an integral part of the evolution of this and other
142 Bioperl modules. Send your comments and suggestions preferably to one
143 of the Bioperl mailing lists. Your participation is much appreciated.
145 bioperl-l@bioperl.org - General discussion
146 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
150 Please direct usage questions or support issues to the mailing list:
152 I<bioperl-l@bioperl.org>
154 rather than to the module maintainer directly. Many experienced and
155 reponsive experts will be able look at the problem and quickly
156 address it. Please include a thorough description of the problem
157 with code and data examples if at all possible.
159 =head2 Reporting Bugs
161 Report bugs to the Bioperl bug tracking system to help us keep track
162 the bugs and their resolution. Bug reports can be submitted via the
165 http://bugzilla.open-bio.org/
167 =head1 AUTHOR - Andrew Macgregor
169 Email andrew at cbbc.murdoch.edu.au
173 Hilmar Lapp, hlapp at gmx.net
178 The rest of the documentation details each of the object
179 methods. Internal methods are usually preceded with a "_".
183 # Let the code begin...
186 package Bio
::Cluster
::UniGene
;
189 use Bio
::Annotation
::Collection
;
190 use Bio
::Annotation
::DBLink
;
191 use Bio
::Annotation
::SimpleValue
;
193 use Bio
::Seq
::SeqFactory
;
195 use base
qw(Bio::Root::Root Bio::Cluster::UniGeneI Bio::IdentifiableI Bio::DescribableI Bio::AnnotatableI Bio::Factory::SequenceStreamI);
198 'Aga' => "Anopheles gambiae",
199 'Ame' => "Apis mellifera",
200 'At' => "Arabidopsis thaliana",
201 'Bmo' => "Bombyx mori",
202 'Bt' => "Bos taurus",
203 'Cel' => "Caenorhabditis elegans",
204 'Cfa' => "Canine familiaris",
205 'Cin' => "Ciona intestinalis",
206 'Cre' => "Chlamydomonas reinhardtii",
207 'Csa' => "Ciona savignyi",
208 'Csi' => "Citrus sinensis",
209 'Ddi' => "Dictyostelium discoideum",
210 'Dr' => "Danio rerio",
211 'Dm' => "Drosophila melanogaster",
212 'Gga' => "Gallus gallus",
213 'Gma' => "Glycine max",
214 'Han' => "Helianthus annus",
215 'Hs' => "Homo sapiens",
216 'Hma' => "Hydra magnipapillata",
217 'Hv' => "Hordeum vulgare",
218 'Lco' => "Lotus corniculatus",
219 'Les' => "Lycopersicon esculentum",
220 'Lsa' => "Lactuca sativa",
221 'Mdo' => "Malus x domestica",
222 'Mgr' => "Magnaporthe grisea",
223 'Mm' => "Mus musculus",
224 'Mtr' => "Medicago truncatula",
225 'Ncr' => "Neurospora crassa",
226 'Oar' => "Ovis aries",
227 'Omy' => "Oncorhynchus mykiss",
228 'Os' => "Oryza sativa",
229 'Ola' => "Oryzias latipes",
230 'Ppa' => "Physcomitrella patens",
231 'Pta' => "Pinus taeda",
232 'Ptp' => "Populus tremula x Populus tremuloides",
233 'Rn' => "Rattus norvegicus",
234 'Sbi' => "Sorghum bicolor",
235 'Sma' => "Schistosoma mansoni",
236 'Sof' => "Saccharum officinarum",
237 'Spu' => "Strongylocentrotus purpuratus",
238 'Ssa' => "Salmo salar",
239 'Ssc' => "Sus scrofa",
240 'Str' => "Xenopus tropicalis",
241 'Stu' => "Solanum tuberosum",
242 'Ta' => "Triticum aestivum",
243 'Tgo' => "Toxoplasma gondii",
244 'Tru' => "Takifugu rubripes",
245 'Vvi' => "Vitis vinifera",
246 'Xl' => "Xenopus laevis",
254 Usage : used by ClusterIO
255 Returns : a new Bio::Cluster::Unigene object
260 # standard new call..
261 my($caller,@args) = @_;
262 my $self = $caller->SUPER::new
(@args);
264 my ($ugid,$desc,$mems,$size,$species,$dispid,$id,$ns,$auth,$v,$seqfact) =
265 $self->_rearrange([qw(UNIGENE_ID
278 $self->{'_alphabet'} = 'dna';
280 $self->unigene_id($ugid) if $ugid;
281 $self->description($desc) if $desc;
282 $self->sequences($mems) if $mems;
283 $self->size($size) if defined($size);
284 $self->display_id($dispid) if $dispid; # overwrites ugid
285 $self->object_id($id) if $id; # overwrites dispid
286 $self->namespace($ns || 'UniGene');
287 $self->authority($auth || 'NCBI');
288 $self->version($v) if defined($v);
289 if( ! defined $seqfact ) {
290 $seqfact = Bio
::Seq
::SeqFactory
->new
291 (-verbose
=> $self->verbose(),
292 -type
=> 'Bio::Seq::RichSeq');
294 $self->sequence_factory($seqfact);
295 if( (! $species) && (defined $self->unigene_id() &&
296 $self->unigene_id() =~ /^([A-Za-z]+)\.[0-9]/)) {
297 # try set a default one depending on the ID
298 $species = $species_map{$1};
300 $self->species($species);
305 =head1 L<Bio::Cluster::UniGeneI> methods
312 Usage : unigene_id();
313 Function: Returns the unigene_id associated with the object.
314 Example : $id = $unigene->unigene_id or $unigene->unigene_id($id)
322 my ($obj,$value) = @_;
323 if( defined $value) {
324 $obj->{'unigene_id'} = $value;
326 return $obj->{'unigene_id'};
335 Function: Returns the title associated with the object.
336 Example : $title = $unigene->title or $unigene->title($title)
338 Args : None or a title
344 my ($obj,$value) = @_;
345 if( defined $value) {
346 $obj->{'title'} = $value;
348 return $obj->{'title'};
356 Function: Returns the gene associated with the object.
357 Example : $gene = $unigene->gene or $unigene->gene($gene)
359 Args : None or a gene
366 return $self->_annotation_value('gene_name', @_);
374 Function: Returns the cytoband associated with the object.
375 Example : $cytoband = $unigene->cytoband or $unigene->cytoband($cytoband)
377 Args : None or a cytoband
384 return $self->_annotation_value('cyto_band', @_);
391 Function: Returns the mgi associated with the object.
392 Example : $mgi = $unigene->mgi or $unigene->mgi($mgi)
405 $self->_remove_dblink('dblink','MGI');
406 # then add if a valid value is present
408 $self->_annotation_dblink('dblink','MGI',$acc);
411 ($acc) = $self->_annotation_dblink('dblink','MGI');
421 Function: Returns or stores a reference to an array containing locuslink data.
422 Returns : An array reference
423 Args : None or an array reference
432 $self->_remove_dblink('dblink','LocusLink');
433 # then add as many accessions as are present
434 foreach my $acc (@
$ll) {
435 $self->_annotation_dblink('dblink','LocusLink',$acc);
438 my @accs = $self->_annotation_dblink('dblink','LocusLink');
449 Function: Returns the homol entry associated with the object.
450 Example : $homol = $unigene->homol or $unigene->homol($homol)
452 Args : None or a homol entry
458 return $self->_annotation_value('homol', @_);
465 Usage : restr_expr();
466 Function: Returns the restr_expr entry associated with the object.
467 Example : $restr_expr = $unigene->restr_expr or $unigene->restr_expr($restr_expr)
469 Args : None or a restr_expr entry
475 return $self->_annotation_value('restr_expr', @_);
482 Usage : gnm_terminus();
483 Function: Returns the gnm_terminus associated with the object.
484 Example : $gnm_terminus = $unigene->gnm_terminus or
485 $unigene->gnm_terminus($gnm_terminus)
487 Args : None or a gnm_terminus
493 return $self->_annotation_value('gnm_terminus', @_);
500 Function: Returns the scount associated with the object.
501 Example : $scount = $unigene->scount or $unigene->scount($scount)
503 Args : None or a scount
508 my ($obj,$value) = @_;
509 if( defined $value) {
510 $obj->{'scount'} = $value;
511 } elsif((! defined($obj->{'scount'})) && defined($obj->sequences())) {
512 $obj->{'scount'} = $obj->size();
514 return $obj->{'scount'};
522 Function: Returns or stores a reference to an array containing
523 tissue expression data
524 Returns : An array reference
525 Args : None or an array reference
532 return $self->_annotation_value_ary('expressed',@_);
539 Usage : chromosome();
540 Function: Returns or stores a reference to an array containing
542 Returns : An array reference
543 Args : None or an array reference
550 return $self->_annotation_value_ary('chromosome',@_);
558 Function: Returns or stores a reference to an array containing sts lines
560 Returns : An array reference
561 Args : None or an array reference
568 return $self->_annotation_value_ary('sts',@_);
576 Function: Returns or stores a reference to an array containing txmap lines
578 Returns : An array reference
579 Args : None or an array reference
586 return $self->_annotation_value_ary('txmap',@_);
594 Function: Returns or stores a reference to an array containing protsim lines
595 This should really only be used by ClusterIO, not directly
596 Returns : An array reference
597 Args : None or an array reference
604 return $self->_annotation_value_ary('protsim',@_);
612 Function: Returns or stores a reference to an array containing
615 This is mostly reserved for ClusterIO parsers. You should
616 use get_members() for get and add_member()/remove_members()
619 Returns : An array reference, or undef
620 Args : None or an array reference or undef
627 return $self->{'members'} = shift if @_;
628 return $self->{'members'};
634 Usage : $obj->species($newval)
635 Function: Get/set the species object for this Unigene cluster.
637 Returns : value of species (a L<Bio::Species> object)
638 Args : on set, new value (a L<Bio::Species> object or
639 the binomial name, or undef, optional)
649 if($species && (! ref($species))) {
650 my @class = reverse(split(' ',$species));
651 $species = Bio
::Species
->new(-classification
=> \
@class);
653 return $self->{'species'} = $species;
655 return $self->{'species'};
659 =head1 L<Bio::ClusterI> methods
667 Function: Get/set the display name or identifier for the cluster
669 This is aliased to unigene_id().
672 Args : optional, on set the display ID ( a string)
677 return shift->unigene_id(@_);
683 Usage : Bio::ClusterI->description("POLYUBIQUITIN")
684 Function: get/set for the consensus description of the cluster
686 This is aliased to title().
688 Returns : the description string
689 Args : Optional the description string
694 return shift->title(@_);
700 Usage : Bio::ClusterI->size();
701 Function: get for the size of the family,
702 calculated from the number of members
704 This is aliased to scount().
706 Returns : the size of the cluster
714 # hard-wiring the size is allowed if there are no sequences
715 return $self->scount(@_) unless defined($self->sequences());
716 # but we can't change the number of members through this method
717 my $n = scalar(@
{$self->sequences()});
718 if(@_ && ($n != $_[0])) {
719 $self->throw("Cannot change cluster size using size() from $n to ".
727 Title : cluster_score
728 Usage : $cluster ->cluster_score(100);
729 Function: get/set for cluster_score which
730 represent the score in which the clustering
731 algorithm assigns to this cluster.
733 For UniGene clusters, there really is no cluster score that
734 would come with the data. However, we provide an
735 implementation here so that you can score UniGene clusters
739 Args : optionally, on set a number
746 return $self->{'cluster_score'} = shift if @_;
747 return $self->{'cluster_score'};
753 Usage : Bio::ClusterI->get_members(($seq1, $seq2));
754 Function: retrieve the members of the family by some criteria
756 Will return all members if no criteria are provided.
758 At this time this implementation does not support
759 specifying criteria and will always return all members.
761 Returns : the array of members
769 my $mems = $self->sequences() || [];
771 if(@
$mems && (ref($mems->[0]) eq "HASH")) {
772 # nope, we need to build the object list from scratch
774 while(my $seq = $self->next_seq()) {
775 push(@memlist, $seq);
777 # we cache this array of objects as the new member list
779 $self->sequences($mems);
786 =head1 Annotatable view at the object properties
793 Usage : $obj->annotation($newval)
794 Function: Get/set the L<Bio::AnnotationCollectionI> object for
795 this UniGene cluster.
797 Many attributes of this class are actually stored within
798 the annotation collection object as L<Bio::AnnotationI>
799 compliant objects, so you can conveniently access them
800 through the same interface as you would e.g. access
801 L<Bio::SeqI> annotation properties.
803 If you call this method in set mode and replace the
804 annotation collection with another one you should know
805 exactly what you are doing.
808 Returns : a L<Bio::AnnotationCollectionI> compliant object
809 Args : on set, new value (a L<Bio::AnnotationCollectionI>
810 compliant object or undef, optional)
819 return $self->{'annotation'} = shift;
820 } elsif(! exists($self->{'annotation'})) {
821 $self->{'annotation'} = Bio
::Annotation
::Collection
->new();
823 return $self->{'annotation'};
827 =head1 Implementation specific methods
829 These are mostly for adding/removing to array properties, and for
830 methods with special functionality.
838 Function: Adds a member object to the list of members.
840 Returns : TRUE if the new member was successfuly added, and FALSE
842 Args : The member to add.
848 my ($self,@mems) = @_;
850 my $memlist = $self->{'members'} || [];
851 # this is an object interface; is the member list already objects?
852 if(@
$memlist && (ref($memlist->[0]) eq "HASH")) {
853 # nope, convert to objects
854 $memlist = [$self->get_members()];
857 push(@
$memlist, @mems);
858 # store if we created this array ref ourselves
859 $self->sequences($memlist);
864 =head2 remove_members
866 Title : remove_members
868 Function: Remove the list of members for this cluster such that the
869 member list is undefined afterwards (as opposed to zero members).
871 Returns : the previous list of members
880 my @mems = $self->get_members();
881 $self->sequences(undef);
886 =head2 next_locuslink
888 Title : next_locuslink
889 Usage : next_locuslink();
890 Function: Returns the next locuslink from an array referred
891 to using $obj->{'locuslink'}
893 If you call this iterator again after it returned undef, it
894 will re-cycle through the list of elements. Changes in the
895 underlying array property while you loop over this iterator
896 will not be reflected until you exhaust the iterator.
898 Example : while ( my $locuslink = $in->next_locuslink() ) {
899 print "$locuslink\n";
909 return $obj->_next_element("ll","locuslink");
915 Usage : next_express();
916 Function: Returns the next tissue from an array referred
917 to using $obj->{'express'}
919 If you call this iterator again after it returned undef, it
920 will re-cycle through the list of elements. Changes in the
921 underlying array property while you loop over this iterator
922 will not be reflected until you exhaust the iterator.
924 Example : while ( my $express = $in->next_express() ) {
935 return $obj->_next_element("express","express");
939 =head2 next_chromosome
941 Title : next_chromosome
942 Usage : next_chromosome();
943 Function: Returns the next chromosome line from an array referred
944 to using $obj->{'chromosome'}
946 If you call this iterator again after it returned undef, it
947 will re-cycle through the list of elements. Changes in the
948 underlying array property while you loop over this iterator
949 will not be reflected until you exhaust the iterator.
951 Example : while ( my $chromosome = $in->next_chromosome() ) {
952 print "$chromosome\n";
959 sub next_chromosome
{
962 return $obj->_next_element("chr","chromosome");
969 Usage : next_protsim();
970 Function: Returns the next protsim line from an array referred
971 to using $obj->{'protsim'}
973 If you call this iterator again after it returned undef, it
974 will re-cycle through the list of elements. Changes in the
975 underlying array property while you loop over this iterator
976 will not be reflected until you exhaust the iterator.
978 Example : while ( my $protsim = $in->next_protsim() ) {
989 return $obj->_next_element("protsim","protsim");
997 Function: Returns the next sts line from an array referred
998 to using $obj->{'sts'}
1000 If you call this iterator again after it returned undef, it
1001 will re-cycle through the list of elements. Changes in the
1002 underlying array property while you loop over this iterator
1003 will not be reflected until you exhaust the iterator.
1005 Example : while ( my $sts = $in->next_sts() ) {
1016 return $obj->_next_element("sts","sts");
1023 Usage : next_txmap();
1024 Function: Returns the next txmap line from an array
1025 referred to using $obj->{'txmap'}
1027 If you call this iterator again after it returned undef, it
1028 will re-cycle through the list of elements. Changes in the
1029 underlying array property while you loop over this iterator
1030 will not be reflected until you exhaust the iterator.
1032 Example : while ( my $tsmap = $in->next_txmap() ) {
1043 return $obj->_next_element("txmap","txmap");
1046 ###############################
1049 # args: prefix name for the queue
1050 # name of the method from which to re-fill
1051 # returns: the next element from that queue, or undef if the queue is empty
1052 ###############################
1054 my ($self,$queuename,$meth) = @_;
1056 $queuename = "_".$queuename."_queue";
1057 if(! exists($self->{$queuename})) {
1058 # re-initialize from array of sequence data
1059 $self->{$queuename} = [@
{$self->$meth() }];
1061 my $queue = $self->{$queuename};
1062 # is queue exhausted (equivalent to end of stream)?
1064 # yes, remove queue and signal to the caller
1065 delete $self->{$queuename};
1068 return shift(@
$queue);
1071 =head1 L<Bio::IdentifiableI> methods
1078 Usage : $string = $obj->object_id()
1079 Function: a string which represents the stable primary identifier
1080 in this namespace of this object. For DNA sequences this
1081 is its accession_number, similarly for protein sequences
1083 This is aliased to unigene_id().
1091 return shift->unigene_id(@_);
1097 Usage : $version = $obj->version()
1098 Function: a number which differentiates between versions of
1099 the same object. Higher numbers are considered to be
1100 later and more relevant, but a single object described
1101 the same identifier should represent the same concept
1103 Unigene clusters usually won't have a version, so this
1104 will be mostly undefined.
1107 Args : on set, new value (a scalar or undef, optional)
1115 return $self->{'version'} = shift if @_;
1116 return $self->{'version'};
1123 Usage : $authority = $obj->authority()
1124 Function: a string which represents the organisation which
1125 granted the namespace, written as the DNS name for
1126 organisation (eg, wormbase.org)
1129 Args : on set, new value (a scalar or undef, optional)
1137 return $self->{'authority'} = shift if @_;
1138 return $self->{'authority'};
1145 Usage : $string = $obj->namespace()
1146 Function: A string representing the name space this identifier
1147 is valid in, often the database name or the name
1148 describing the collection
1151 Args : on set, new value (a scalar or undef, optional)
1159 return $self->{'namespace'} = shift if @_;
1160 return $self->{'namespace'};
1163 =head1 L<Bio::DescribableI> methods
1169 Title : display_name
1170 Usage : $string = $obj->display_name()
1171 Function: A string which is what should be displayed to the user
1172 the string should have no spaces (ideally, though a cautious
1173 user of this interface would not assumme this) and should be
1174 less than thirty characters (though again, double checking
1175 this is a good idea)
1177 This is aliased to unigene_id().
1185 return shift->unigene_id(@_);
1189 =head2 description()
1192 Usage : $string = $obj->description()
1193 Function: A text string suitable for displaying to the user a
1194 description. This string is likely to have spaces, but
1195 should not have any newlines or formatting - just plain
1196 text. The string should not be greater than 255 characters
1197 and clients can feel justified at truncating strings at 255
1198 characters for the purposes of display
1200 This is already demanded by Bio::ClusterI and hence is
1209 =head1 L<Bio::Factory::SequenceStreamI> methods
1217 Function: Returns the next seq as a Seq object as defined by
1218 $seq->sequence_factory(),
1219 at present an empty Bio::Seq::RichSeq object with
1220 just the accession_number() and pid() set
1222 This iterator will not exhaust the array of member
1223 sequences. If you call next_seq() again after it returned
1224 undef, it will re-cycle through the list of member
1227 Example : while ( my $sequence = $in->next_seq() ) {
1228 print $sequence->accession_number() . "\n";
1230 Returns : Bio::PrimarySeqI object
1238 if(! exists($obj->{'_seq_queue'})) {
1239 # re-initialize from array of sequence data
1240 $obj->{'_seq_queue'} = [@
{$obj->sequences()}];
1242 my $queue = $obj->{'_seq_queue'};
1243 # is queue exhausted (equivalent to end of stream)?
1245 # yes, remove queue and signal to the caller
1246 delete $obj->{'_seq_queue'};
1249 # no, still data in the queue: get the next one from the queue
1250 my $seq_h = shift(@
$queue);
1251 # if this is not a simple hash ref, it's an object already, and we'll
1253 return $seq_h if(ref($seq_h) ne 'HASH');
1254 # nope, we need to assemble this object from scratch
1256 # assemble the annotation collection
1257 my $ac = Bio
::Annotation
::Collection
->new();
1258 foreach my $k (keys %$seq_h) {
1259 next if $k =~ /acc|pid|nid|version/;
1260 my $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $k,
1261 -value
=> $seq_h->{$k});
1262 $ac->add_Annotation($ann);
1264 # assemble the initialization parameters and create object
1265 my $seqobj = $obj->sequence_factory->create(
1266 -accession_number
=> $seq_h->{acc
},
1267 -pid
=> $seq_h->{pid
},
1268 # why does NCBI prepend a 'g' to its own identifiers??
1269 -primary_id
=> $seq_h->{nid
} && $seq_h->{nid
} =~ /^g\d+$/ ?
1270 substr($seq_h->{nid
},1) : $seq_h->{nid
},
1271 -display_id
=> $seq_h->{acc
},
1272 -seq_version
=> $seq_h->{version
},
1273 -alphabet
=> $obj->{'_alphabet'},
1274 -namespace
=> $seq_h->{acc
} =~ /^NM_/ ?
'RefSeq' : 'GenBank',
1275 -authority
=> $obj->authority(), # default is NCBI
1276 -species
=> $obj->species(),
1282 =head2 sequence_factory
1284 Title : sequence_factory
1285 Usage : $seqio->sequence_factory($seqfactory)
1286 Function: Get/Set the Bio::Factory::SequenceFactoryI
1287 Returns : Bio::Factory::SequenceFactoryI
1288 Args : [optional] Bio::Factory::SequenceFactoryI
1293 sub sequence_factory
{
1294 my ($self,$obj) = @_;
1295 if( defined $obj ) {
1296 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
1297 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)." sequence_factory()");
1299 $self->{'_seqfactory'} = $obj;
1301 $self->{'_seqfactory'};
1304 =head1 Private methods
1308 =head2 _annotation_value
1310 Title : _annotation_value
1312 Function: Private method.
1314 Returns : the value (a string)
1315 Args : annotation key (a string)
1316 on set, annotation value (a string)
1321 sub _annotation_value
{
1328 if(! defined($val)) {
1329 ($ann) = $self->annotation->remove_Annotations($key);
1330 return $ann ?
$ann->value() : undef;
1333 ($ann) = $self->annotation->get_Annotations($key);
1334 if(defined $ann && (! $val)) {
1335 # get mode and exists
1336 $val = $ann->value();
1340 $ann = Bio
::Annotation
::SimpleValue
->new(-tagname
=> $key);
1341 $self->annotation->add_Annotation($ann);
1349 =head2 _annotation_value_ary
1351 Title : _annotation_value_ary
1353 Function: Private method.
1355 Returns : reference to the array of values
1356 Args : annotation key (a string)
1357 on set, reference to an array holding the values
1362 sub _annotation_value_ary
{
1363 my ($self,$key,$arr) = @_;
1365 my $ac = $self->annotation;
1368 $ac->remove_Annotations($key);
1369 # then add as many values as are present
1370 foreach my $val (@
$arr) {
1371 my $ann = Bio
::Annotation
::SimpleValue
->new(-value
=> $val,
1374 $ac->add_Annotation($ann);
1377 my @vals = map { $_->value(); } $ac->get_Annotations($key);
1384 =head2 _annotation_dblink
1386 Title : _annotation_dblink
1388 Function: Private method.
1390 Returns : array of accessions for the given database (namespace)
1391 Args : annotation key (a string)
1392 dbname (a string) (optional on get, mandatory on set)
1393 on set, accession or ID (a string), and version
1398 sub _annotation_dblink
{
1399 my ($self,$key,$dbname,$acc,$version) = @_;
1402 # set mode -- this is adding here
1403 my $ann = Bio
::Annotation
::DBLink
->new(-tagname
=> $key,
1404 -primary_id
=> $acc,
1405 -database
=> $dbname,
1406 -version
=> $version);
1407 $self->annotation->add_Annotation($ann);
1411 my @anns = $self->annotation->get_Annotations($key);
1412 # filter out those that don't match the requested database
1414 @anns = grep { $_->database() eq $dbname; } @anns;
1416 return map { $_->primary_id(); } @anns;
1420 =head2 _remove_dblink
1422 Title : _remove_dblink
1424 Function: Private method.
1426 Returns : array of accessions for the given database (namespace)
1427 Args : annotation key (a string)
1428 dbname (a string) (optional)
1434 my ($self,$key,$dbname) = @_;
1436 my $ac = $self->annotation();
1439 foreach my $ann ($ac->remove_Annotations($key)) {
1440 if($ann->database() eq $dbname) {
1443 $ac->add_Annotation($ann);
1447 @anns = $ac->remove_Annotations($key);
1449 return map { $_->primary_id(); } @anns;
1453 #####################################################################
1454 # aliases for naming consistency or other reasons #
1455 #####################################################################
1457 *sequence
= \
&sequences
;