Couple more minor edits ...
[bioperl-live.git] / Bio / SeqIO / embl.pm
blob1b438949ae9764b0b4a607ebf23d62d587e3f07e
2 # BioPerl module for Bio::SeqIO::EMBL
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Ewan Birney <birney@ebi.ac.uk>
8 # Copyright Ewan Birney
10 # You may distribute this module under the same terms as perl itself
12 # POD documentation - main docs before the code
14 =head1 NAME
16 Bio::SeqIO::embl - EMBL sequence input/output stream
18 =head1 SYNOPSIS
20 It is probably best not to use this object directly, but
21 rather go through the SeqIO handler system. Go:
23 $stream = Bio::SeqIO->new(-file => $filename, -format => 'EMBL');
25 while ( (my $seq = $stream->next_seq()) ) {
26 # do something with $seq
29 =head1 DESCRIPTION
31 This object can transform Bio::Seq objects to and from EMBL flat
32 file databases.
34 There is a lot of flexibility here about how to dump things which
35 should be documented more fully.
37 There should be a common object that this and Genbank share (probably
38 with Swissprot). Too much of the magic is identical.
40 =head2 Optional functions
42 =over 3
44 =item _show_dna()
46 (output only) shows the dna or not
48 =item _post_sort()
50 (output only) provides a sorting func which is applied to the FTHelpers
51 before printing
53 =item _id_generation_func()
55 This is function which is called as
57 print "ID ", $func($annseq), "\n";
59 To generate the ID line. If it is not there, it generates a sensible ID
60 line using a number of tools.
62 If you want to output annotations in EMBL format they need to be
63 stored in a Bio::Annotation::Collection object which is accessible
64 through the Bio::SeqI interface method L<annotation()|annotation>.
66 The following are the names of the keys which are polled from a
67 L<Bio::Annotation::Collection> object.
69 reference - Should contain Bio::Annotation::Reference objects
70 comment - Should contain Bio::Annotation::Comment objects
71 dblink - Should contain Bio::Annotation::DBLink objects
73 =back
75 =head1 FEEDBACK
77 =head2 Mailing Lists
79 User feedback is an integral part of the evolution of this and other
80 Bioperl modules. Send your comments and suggestions preferably to one
81 of the Bioperl mailing lists. Your participation is much appreciated.
83 bioperl-l@bioperl.org - General discussion
84 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
86 =head2 Support
88 Please direct usage questions or support issues to the mailing list:
90 I<bioperl-l@bioperl.org>
92 rather than to the module maintainer directly. Many experienced and
93 reponsive experts will be able look at the problem and quickly
94 address it. Please include a thorough description of the problem
95 with code and data examples if at all possible.
97 =head2 Reporting Bugs
99 Report bugs to the Bioperl bug tracking system to help us keep track
100 the bugs and their resolution. Bug reports can be submitted via
101 the web:
103 https://github.com/bioperl/bioperl-live/issues
105 =head1 AUTHOR - Ewan Birney
107 Email birney@ebi.ac.uk
109 =head1 APPENDIX
111 The rest of the documentation details each of the object
112 methods. Internal methods are usually preceded with a _
114 =cut
117 # Let the code begin...
120 package Bio::SeqIO::embl;
121 use vars qw(%FTQUAL_NO_QUOTE);
122 use strict;
123 use Bio::SeqIO::FTHelper;
124 use Bio::SeqFeature::Generic;
125 use Bio::Species;
126 use Bio::Seq::SeqFactory;
127 use Bio::Annotation::Collection;
128 use Bio::Annotation::Comment;
129 use Bio::Annotation::Reference;
130 use Bio::Annotation::DBLink;
132 use base qw(Bio::SeqIO);
134 # Note that a qualifier that exceeds one line (i.e. a long label) will
135 # automatically be quoted regardless:
136 %FTQUAL_NO_QUOTE=(
137 'anticodon'=>1,
138 'citation'=>1,
139 'codon'=>1,
140 'codon_start'=>1,
141 'cons_splice'=>1,
142 'direction'=>1,
143 'evidence'=>1,
144 'label'=>1,
145 'mod_base'=> 1,
146 'number'=> 1,
147 'rpt_type'=> 1,
148 'rpt_unit'=> 1,
149 'transl_except'=> 1,
150 'transl_table'=> 1,
151 'usedin'=> 1,
154 sub _initialize {
155 my($self,@args) = @_;
157 $self->SUPER::_initialize(@args);
158 # hash for functions for decoding keys.
159 $self->{'_func_ftunit_hash'} = {};
160 # sets this to one by default. People can change it
161 $self->_show_dna(1);
162 if ( ! defined $self->sequence_factory ) {
163 $self->sequence_factory(Bio::Seq::SeqFactory->new
164 (-verbose => $self->verbose(),
165 -type => 'Bio::Seq::RichSeq'));
169 =head2 next_seq
171 Title : next_seq
172 Usage : $seq = $stream->next_seq()
173 Function: returns the next sequence in the stream
174 Returns : Bio::Seq object
175 Args :
177 =cut
179 sub next_seq {
180 my ($self,@args) = @_;
181 my ($pseq,$c,$line,$name,$desc,$acc,$seqc,$mol,$div,
182 $date, $comment, @date_arr);
184 my ($annotation, %params, @features) =
185 Bio::Annotation::Collection->new();
187 $line = $self->_readline;
188 # This needs to be before the first eof() test
190 if ( !defined $line ) {
191 return; # no throws - end of file
194 if ( $line =~ /^\s+$/ ) {
195 while ( defined ($line = $self->_readline) ) {
196 $line =~/^\S/ && last;
198 # return without error if the whole next sequence was just a single
199 # blank line and then eof
200 return unless $line;
203 # no ID as 1st non-blank line, need short circuit and exit routine
204 $self->throw("EMBL stream with no ID. Not embl in my book")
205 unless $line =~ /^ID\s+\S+/;
207 # At this point we are sure that $line contains an ID header line
208 my $alphabet;
209 if ( $line =~ tr/;/;/ == 6) { # New style headers contain exactly six semicolons.
211 # New style header (EMBL Release >= 87, after June 2006)
212 my $topology;
213 my $sv;
215 # ID DQ299383; SV 1; linear; mRNA; STD; MAM; 431 BP.
216 # This regexp comes from the new2old.pl conversion script, from EBI
217 if ($line =~ m/^ID (\S+);\s+SV (\d+); (\w+); ([^;]+); (\w{3}); (\w{3}); (\d+) BP./) {
218 ($name, $sv, $topology, $mol, $div) = ($1, $2, $3, $4, $6);
220 if (defined $sv) {
221 $params{'-seq_version'} = $sv;
222 $params{'-version'} = $sv;
225 if (defined $topology && $topology eq 'circular') {
226 $params{'-is_circular'} = 1;
229 if (defined $mol ) {
230 if ($mol =~ /DNA/) {
231 $alphabet = 'dna';
232 } elsif ($mol =~ /RNA/) {
233 $alphabet = 'rna';
234 } elsif ($mol =~ /AA/) {
235 $alphabet = 'protein';
238 } else {
240 # Old style header (EMBL Release < 87, before June 2006)
241 if ($line =~ /^ID\s+(\S+)[^;]*;\s+(\S+)[^;]*;\s+(\S+)[^;]*;/) {
242 ($name, $mol, $div) = ($1, $2, $3);
245 if ($mol) {
246 if ( $mol =~ /circular/ ) {
247 $params{'-is_circular'} = 1;
248 $mol =~ s|circular ||;
250 if (defined $mol ) {
251 if ($mol =~ /DNA/) {
252 $alphabet='dna';
253 } elsif ($mol =~ /RNA/) {
254 $alphabet='rna';
255 } elsif ($mol =~ /AA/) {
256 $alphabet='protein';
262 unless( defined $name && length($name) ) {
263 $name = "unknown_id";
266 # $self->warn("not parsing upper annotation in EMBL file yet!");
267 my $buffer = $line;
268 local $_;
269 BEFORE_FEATURE_TABLE :
270 my $ncbi_taxid;
271 until ( !defined $buffer ) {
272 $_ = $buffer;
273 # Exit at start of Feature table
274 if ( /^(F[HT]|SQ)/ ) {
275 $self->_pushback($_) if( $1 eq 'SQ' || $1 eq 'FT');
276 last;
278 # Description line(s)
279 if (/^DE\s+(\S.*\S)/) {
280 $desc .= $desc ? " $1" : $1;
283 #accession number
284 if ( /^AC\s+(.*)?/ || /^PA\s+(.*)?/) {
285 my @accs = split(/[; ]+/, $1); # allow space in addition
286 $params{'-accession_number'} = shift @accs
287 unless defined $params{'-accession_number'};
288 push @{$params{'-secondary_accessions'}}, @accs;
291 #version number
292 if ( /^SV\s+\S+\.(\d+);?/ ) {
293 my $sv = $1;
294 #$sv =~ s/\;//;
295 $params{'-seq_version'} = $sv;
296 $params{'-version'} = $sv;
299 #date (NOTE: takes last date line)
300 if ( /^DT\s+(.+)$/ ) {
301 my $line = $1;
302 my ($date, $version) = split(' ', $line, 2);
303 $date =~ tr/,//d; # remove comma if new version
304 if ($version) {
305 if ($version =~ /\(Rel\. (\d+), Created\)/xms ) {
306 my $release = Bio::Annotation::SimpleValue->new(
307 -tagname => 'creation_release',
308 -value => $1
310 $annotation->add_Annotation($release);
311 } elsif ($version =~ /\(Rel\. (\d+), Last updated, Version (\d+)\)/xms ) {
312 my $release = Bio::Annotation::SimpleValue->new(
313 -tagname => 'update_release',
314 -value => $1
316 $annotation->add_Annotation($release);
318 my $update = Bio::Annotation::SimpleValue->new(
319 -tagname => 'update_version',
320 -value => $2
322 $annotation->add_Annotation($update);
325 push @{$params{'-dates'}}, $date;
328 #keywords
329 if ( /^KW (.*)\S*$/ ) {
330 my @kw = split(/\s*\;\s*/,$1);
331 push @{$params{'-keywords'}}, @kw;
334 # Organism name and phylogenetic information
335 elsif (/^O[SC]/) {
336 # pass the accession number so we can give an informative throw message if necessary
337 my $species = $self->_read_EMBL_Species(\$buffer, $params{'-accession_number'});
338 $params{'-species'}= $species;
341 # NCBI TaxID Xref
342 elsif (/^OX/) {
343 if (/NCBI_TaxID=(\d+)/) {
344 $ncbi_taxid=$1;
347 my @links = $self->_read_EMBL_TaxID_DBLink(\$buffer);
348 foreach my $dblink ( @links ) {
349 $annotation->add_Annotation('dblink',$dblink);
353 # References
354 elsif (/^R/) {
355 my @refs = $self->_read_EMBL_References(\$buffer);
356 foreach my $ref ( @refs ) {
357 $annotation->add_Annotation('reference',$ref);
361 # DB Xrefs
362 elsif (/^DR/) {
363 my @links = $self->_read_EMBL_DBLink(\$buffer);
364 foreach my $dblink ( @links ) {
365 $annotation->add_Annotation('dblink',$dblink);
369 # Comments
370 elsif (/^CC\s+(.*)/) {
371 $comment .= $1;
372 $comment .= " ";
373 while (defined ($_ = $self->_readline) ) {
374 if (/^CC\s+(.*)/) {
375 $comment .= $1;
376 $comment .= " ";
377 } else {
378 last;
381 my $commobj = Bio::Annotation::Comment->new();
382 $commobj->text($comment);
383 $annotation->add_Annotation('comment',$commobj);
384 $comment = "";
387 # Get next line.
388 $buffer = $self->_readline;
391 while ( defined ($_ = $self->_readline) ) {
392 /^FT\s{3}\w/ && last;
393 /^SQ / && last;
394 /^CO / && last;
396 $buffer = $_;
398 if (defined($buffer) && $buffer =~ /^FT /) {
399 until ( !defined ($buffer) ) {
400 my $ftunit = $self->_read_FTHelper_EMBL(\$buffer);
402 # process ftunit
403 my $feat =
404 $ftunit->_generic_seqfeature($self->location_factory(), $name);
406 # add taxon_id from source if available
407 # Notice, this will override what is found in the OX line.
408 # this is by design as this seems to be the official way
409 # of specifying a TaxID
410 if ($params{'-species'} && ($feat->primary_tag eq 'source')
411 && $feat->has_tag('db_xref')
412 && (! $params{'-species'}->ncbi_taxid())) {
413 foreach my $tagval ($feat->get_tag_values('db_xref')) {
414 if (index($tagval,"taxon:") == 0) {
415 $params{'-species'}->ncbi_taxid(substr($tagval,6));
416 last;
421 # add feature to list of features
422 push(@features, $feat);
424 if ( $buffer !~ /^FT/ ) {
425 last;
429 # Set taxid found in OX line
430 if ($params{'-species'} && defined $ncbi_taxid
431 && (! $params{'-species'}->ncbi_taxid())) {
432 $params{'-species'}->ncbi_taxid($ncbi_taxid);
435 # skip comments
436 while ( defined ($buffer) && $buffer =~ /^XX/ ) {
437 $buffer = $self->_readline();
440 if ( $buffer =~ /^CO/ ) {
441 # bug#2982
442 # special : create contig as annotation
443 while ( defined ($buffer) ) {
444 $annotation->add_Annotation($_) for $self->_read_EMBL_Contig(\$buffer);
445 if ( !$buffer || $buffer !~ /^CO/ ) {
446 last;
449 $buffer ||= '';
451 if ($buffer !~ /^\/\//) { # if no SQ lines following CO (bug#2958)
452 if ( $buffer !~ /^SQ/ ) {
453 while ( defined ($_ = $self->_readline) ) {
454 /^SQ/ && last;
457 $seqc = "";
458 while ( defined ($_ = $self->_readline) ) {
459 m{^//} && last;
460 $_ = uc($_);
461 s/[^A-Za-z]//g;
462 $seqc .= $_;
465 my $seq = $self->sequence_factory->create
466 (-verbose => $self->verbose(),
467 -division => $div,
468 -seq => $seqc,
469 -desc => $desc,
470 -display_id => $name,
471 -annotation => $annotation,
472 -molecule => $mol,
473 -alphabet => $alphabet,
474 -features => \@features,
475 %params);
476 return $seq;
481 =head2 _write_ID_line
483 Title : _write_ID_line
484 Usage : $self->_write_ID_line($seq);
485 Function: Writes the EMBL Release 87 format ID line to the stream, unless
486 : there is a user-supplied ID line generation function in which
487 : case that is used instead.
488 : ( See Bio::SeqIO::embl::_id_generation_function(). )
489 Returns : nothing
490 Args : Bio::Seq object
492 =cut
494 sub _write_ID_line {
496 my ($self, $seq) = @_;
498 my $id_line;
499 # If there is a user-supplied ID generation function, use it.
500 if ( $self->_id_generation_func ) {
501 $id_line = "ID " . &{$self->_id_generation_func}($seq) . "\nXX\n";
503 # Otherwise, generate a standard EMBL release 87 (June 2006) ID line.
504 else {
506 # The sequence name is supposed to be the primary accession number,
507 my $name = $seq->accession_number();
508 if ( not(defined $name) || $name eq 'unknown') {
509 # but if it is not present, use the sequence ID or the empty string
510 $name = $seq->id() || '';
513 $self->warn("No whitespace allowed in EMBL id [". $name. "]") if $name =~ /\s/;
515 # Use the sequence version, or default to 1.
516 my $version = $seq->version() || 1;
518 my $len = $seq->length();
520 # Taxonomic division.
521 my $div;
522 if ( $seq->can('division') && defined($seq->division) &&
523 $self->_is_valid_division($seq->division) ) {
524 $div = $seq->division();
525 } else {
526 $div ||= 'UNC'; # 'UNC' is the EMBL division code for 'unclassified'.
529 my $mol;
530 # If the molecule type is a valid EMBL type, use it.
531 if ( $seq->can('molecule')
532 && defined($seq->molecule)
533 && $self->_is_valid_molecule_type($seq->molecule)
535 $mol = $seq->molecule();
537 # Otherwise, choose unassigned DNA or RNA based on the alphabet.
538 elsif ($seq->can('primary_seq') && defined $seq->primary_seq->alphabet) {
539 my $alphabet =$seq->primary_seq->alphabet;
540 if ($alphabet eq 'dna') {
541 $mol ='unassigned DNA';
542 } elsif ($alphabet eq 'rna') {
543 $mol='unassigned RNA';
544 } elsif ($alphabet eq 'protein') {
545 $self->warn("Protein sequence found; EMBL is a nucleotide format.");
546 $mol='AA'; # AA is not a valid EMBL molecule type.
550 my $topology = 'linear';
551 if ($seq->is_circular) {
552 $topology = 'circular';
555 $mol ||= ''; # 'unassigned'; ?
556 $id_line = "ID $name; SV $version; $topology; $mol; STD; $div; $len BP.\nXX\n";
557 $self->_print($id_line);
561 =head2 _is_valid_division
563 Title : _is_valid_division
564 Usage : $self->_is_valid_division($div)
565 Function: tests division code for validity
566 Returns : true if $div is a valid EMBL release 87 taxonomic division.
567 Args : taxonomic division code string
569 =cut
571 sub _is_valid_division {
572 my ($self, $division) = @_;
574 my %EMBL_divisions = (
575 "PHG" => 1, # Bacteriophage
576 "ENV" => 1, # Environmental Sample
577 "FUN" => 1, # Fungal
578 "HUM" => 1, # Human
579 "INV" => 1, # Invertebrate
580 "MAM" => 1, # Other Mammal
581 "VRT" => 1, # Other Vertebrate
582 "MUS" => 1, # Mus musculus
583 "PLN" => 1, # Plant
584 "PRO" => 1, # Prokaryote
585 "ROD" => 1, # Other Rodent
586 "SYN" => 1, # Synthetic
587 "UNC" => 1, # Unclassified
588 "VRL" => 1 # Viral
591 return exists($EMBL_divisions{$division});
594 =head2 _is_valid_molecule_type
596 Title : _is_valid_molecule_type
597 Usage : $self->_is_valid_molecule_type($mol)
598 Function: tests molecule type for validity
599 Returns : true if $mol is a valid EMBL release 87 molecule type.
600 Args : molecule type string
602 =cut
604 sub _is_valid_molecule_type {
605 my ($self, $moltype) = @_;
607 my %EMBL_molecule_types = (
608 "genomic DNA" => 1,
609 "genomic RNA" => 1,
610 "mRNA" => 1,
611 "tRNA" => 1,
612 "rRNA" => 1,
613 "snoRNA" => 1,
614 "snRNA" => 1,
615 "scRNA" => 1,
616 "pre-RNA" => 1,
617 "other RNA" => 1,
618 "other DNA" => 1,
619 "unassigned DNA" => 1,
620 "unassigned RNA" => 1
623 return exists($EMBL_molecule_types{$moltype});
626 =head2 write_seq
628 Title : write_seq
629 Usage : $stream->write_seq($seq)
630 Function: writes the $seq object (must be seq) to the stream
631 Returns : 1 for success and undef for error
632 Args : array of 1 to n Bio::SeqI objects
635 =cut
637 sub write_seq {
638 my ($self,@seqs) = @_;
640 foreach my $seq ( @seqs ) {
641 $self->throw("Attempting to write with no seq!") unless defined $seq;
642 unless ( ref $seq && $seq->isa('Bio::SeqI' ) ) {
643 $self->warn("$seq is not a SeqI compliant sequence object!")
644 if $self->verbose >= 0;
645 unless ( ref $seq && $seq->isa('Bio::PrimarySeqI' ) ) {
646 $self->throw("$seq is not a PrimarySeqI compliant sequence object!");
649 my $str = $seq->seq || '';
651 # Write the ID line.
652 $self->_write_ID_line($seq);
655 # Write the accession line if present
656 my( $acc );
658 if ( my $func = $self->_ac_generation_func ) {
659 $acc = &{$func}($seq);
660 } elsif ( $seq->isa('Bio::Seq::RichSeqI') &&
661 defined($seq->accession_number) ) {
662 $acc = $seq->accession_number;
663 $acc = join("; ", $acc, $seq->get_secondary_accessions);
664 } elsif ( $seq->can('accession_number') ) {
665 $acc = $seq->accession_number;
668 if (defined $acc) {
669 $self->_print("AC $acc;\n",
670 "XX\n") || return;
674 # Date lines
675 my $switch=0;
676 if ( $seq->can('get_dates') ) {
677 my @dates = $seq->get_dates();
678 my $ct = 1;
679 my $date_flag = 0;
680 my ($cr) = $seq->annotation->get_Annotations("creation_release");
681 my ($ur) = $seq->annotation->get_Annotations("update_release");
682 my ($uv) = $seq->annotation->get_Annotations("update_version");
684 unless ($cr && $ur && $ur) {
685 $date_flag = 1;
688 foreach my $dt (@dates) {
689 if (!$date_flag) {
690 $self->_write_line_EMBL_regex("DT ","DT ",
691 $dt." (Rel. $cr, Created)",
692 '\s+|$',80) if $ct == 1;
693 $self->_write_line_EMBL_regex("DT ","DT ",
694 $dt." (Rel. $ur, Last updated, Version $uv)",
695 '\s+|$',80) if $ct == 2;
696 } else { # other formats?
697 $self->_write_line_EMBL_regex("DT ","DT ",
698 $dt,'\s+|$',80);
700 $switch =1;
701 $ct++;
703 if ($switch == 1) {
704 $self->_print("XX\n") || return;
708 # Description lines
709 $self->_write_line_EMBL_regex("DE ","DE ",$seq->desc(),'\s+|$',80) || return; #'
710 $self->_print( "XX\n") || return;
712 # if there, write the kw line
714 my( $kw );
715 if ( my $func = $self->_kw_generation_func ) {
716 $kw = &{$func}($seq);
717 } elsif ( $seq->can('keywords') ) {
718 $kw = $seq->keywords;
720 if (defined $kw) {
721 $self->_write_line_EMBL_regex("KW ", "KW ", $kw, '\s+|$', 80) || return; #'
722 $self->_print( "XX\n") || return;
726 # Organism lines
728 if ($seq->can('species') && (my $spec = $seq->species)) {
729 my @class = $spec->classification();
730 shift @class; # get rid of species name. Some embl files include
731 # the species name in the OC lines, but this seems
732 # more like an error than something we need to
733 # emulate
734 my $OS = $spec->scientific_name;
735 if ($spec->common_name) {
736 $OS .= ' ('.$spec->common_name.')';
738 $self->_print("OS $OS\n") || return;
739 my $OC = join('; ', reverse(@class)) .'.';
740 $self->_write_line_EMBL_regex("OC ","OC ",$OC,'; |$',80) || return;
741 if ($spec->organelle) {
742 $self->_write_line_EMBL_regex("OG ","OG ",$spec->organelle,'; |$',80) || return;
744 my $ncbi_taxid = $spec->ncbi_taxid;
745 if ($ncbi_taxid) {
746 $self->_print("OX NCBI_TaxID=$ncbi_taxid\n") || return;
748 $self->_print("XX\n") || return;
750 # Reference lines
751 my $t = 1;
752 if ( $seq->can('annotation') && defined $seq->annotation ) {
753 foreach my $ref ( $seq->annotation->get_Annotations('reference') ) {
754 $self->_print( "RN [$t]\n") || return;
756 # Having no RP line is legal, but we need both
757 # start and end for a valid location.
758 if ($ref->comment) {
759 $self->_write_line_EMBL_regex("RC ", "RC ", $ref->comment, '\s+|$', 80) || return; #'
761 my $start = $ref->start;
762 my $end = $ref->end;
763 if ($start and $end) {
764 $self->_print( "RP $start-$end\n") || return;
765 } elsif ($start or $end) {
766 $self->throw("Both start and end are needed for a valid RP line.".
767 " Got: start='$start' end='$end'");
770 if (my $med = $ref->medline) {
771 $self->_print( "RX MEDLINE; $med.\n") || return;
773 if (my $pm = $ref->pubmed) {
774 $self->_print( "RX PUBMED; $pm.\n") || return;
776 my $authors = $ref->authors;
777 $authors =~ s/([\w\.]) (\w)/$1#$2/g; # add word wrap protection char '#'
779 $self->_write_line_EMBL_regex("RA ", "RA ",
780 $authors . ";",
781 '\s+|$', 80) || return; #'
783 # If there is no title to the reference, it appears
784 # as a single semi-colon. All titles must end in
785 # a semi-colon.
786 my $ref_title = $ref->title || '';
787 $ref_title =~ s/[\s;]*$/;/;
788 $self->_write_line_EMBL_regex("RT ", "RT ", $ref_title, '\s+|$', 80) || return; #'
789 $self->_write_line_EMBL_regex("RL ", "RL ", $ref->location, '\s+|$', 80) || return; #'
790 $self->_print("XX\n") || return;
791 $t++;
794 # DB Xref lines
795 if (my @db_xref = $seq->annotation->get_Annotations('dblink') ) {
796 for my $dr (@db_xref) {
797 my $db_name = $dr->database;
798 my $prim = $dr->primary_id;
800 my $opt = $dr->optional_id || '';
801 my $line = $opt ? "$db_name; $prim; $opt." : "$db_name; $prim.";
802 $self->_write_line_EMBL_regex("DR ", "DR ", $line, '\s+|$', 80) || return; #'
804 $self->_print("XX\n") || return;
807 # Comment lines
808 foreach my $comment ( $seq->annotation->get_Annotations('comment') ) {
809 $self->_write_line_EMBL_regex("CC ", "CC ", $comment->text, '\s+|$', 80) || return; #'
810 $self->_print("XX\n") || return;
813 # "\\s\+\|\$"
815 ## FEATURE TABLE
817 $self->_print("FH Key Location/Qualifiers\n") || return;
818 $self->_print("FH\n") || return;
820 my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : ();
821 if ($feats[0]) {
822 if ( defined $self->_post_sort ) {
823 # we need to read things into an array.
824 # Process. Sort them. Print 'em
826 my $post_sort_func = $self->_post_sort();
827 my @fth;
829 foreach my $sf ( @feats ) {
830 push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq));
833 @fth = sort { &$post_sort_func($a,$b) } @fth;
835 foreach my $fth ( @fth ) {
836 $self->_print_EMBL_FTHelper($fth) || return;
838 } else {
839 # not post sorted. And so we can print as we get them.
840 # lower memory load...
842 foreach my $sf ( @feats ) {
843 my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq);
844 foreach my $fth ( @fth ) {
845 if ( $fth->key eq 'CONTIG') {
846 $self->_show_dna(0);
848 $self->_print_EMBL_FTHelper($fth) || return;
854 if ( $self->_show_dna() == 0 ) {
855 $self->_print( "//\n") || return;
856 return;
858 $self->_print( "XX\n") || return;
860 # finished printing features.
862 # print contig if present : bug#2982
863 if ( $seq->can('annotation') && defined $seq->annotation) {
864 foreach my $ctg ( $seq->annotation->get_Annotations('contig') ) {
865 if ($ctg->value) {
866 $self->_write_line_EMBL_regex("CO ","CO ", $ctg->value,
867 '[,]|$', 80) || return;
871 # print sequence lines only if sequence is present! bug#2982
872 if (length($str)) {
873 $str =~ tr/A-Z/a-z/;
875 # Count each nucleotide
876 my $alen = $str =~ tr/a/a/;
877 my $clen = $str =~ tr/c/c/;
878 my $glen = $str =~ tr/g/g/;
879 my $tlen = $str =~ tr/t/t/;
881 my $len = $seq->length();
882 my $olen = $seq->length() - ($alen + $tlen + $clen + $glen);
883 if ( $olen < 0 ) {
884 $self->warn("Weird. More atgc than bases. Problem!");
887 $self->_print("SQ Sequence $len BP; $alen A; $clen C; $glen G; $tlen T; $olen other;\n") || return;
889 my $nuc = 60; # Number of nucleotides per line
890 my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line
891 my $out_pat = 'A11' x 6; # Pattern for packing a line
892 my $length = length($str);
894 # Calculate the number of nucleotides which fit on whole lines
895 my $whole = int($length / $nuc) * $nuc;
897 # Print the whole lines
898 my( $i );
899 for ($i = 0; $i < $whole; $i += $nuc) {
900 my $blocks = pack $out_pat,
901 unpack $whole_pat,
902 substr($str, $i, $nuc);
903 $self->_print(sprintf(" $blocks%9d\n", $i + $nuc)) || return;
906 # Print the last line
907 if (my $last = substr($str, $i)) {
908 my $last_len = length($last);
909 my $last_pat = 'a10' x int($last_len / 10) .'a'. $last_len % 10;
910 my $blocks = pack $out_pat,
911 unpack($last_pat, $last);
912 $self->_print(sprintf(" $blocks%9d\n", $length)) ||
913 return; # Add the length to the end
917 $self->_print( "//\n") || return;
919 $self->flush if $self->_flush_on_write && defined $self->_fh;
921 return 1;
924 =head2 _print_EMBL_FTHelper
926 Title : _print_EMBL_FTHelper
927 Usage :
928 Function: Internal function
929 Returns : 1 if writing suceeded, otherwise undef
930 Args :
933 =cut
935 sub _print_EMBL_FTHelper {
936 my ($self,$fth) = @_;
938 if ( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) {
939 $fth->warn("$fth is not a FTHelper class. Attempting to print, but there could be tears!");
943 #$self->_print( "FH Key Location/Qualifiers\n");
944 #$self->_print( sprintf("FT %-15s %s\n",$fth->key,$fth->loc));
945 # let
946 if ( $fth->key eq 'CONTIG' ) {
947 $self->_print("XX\n") || return;
948 $self->_write_line_EMBL_regex("CO ",
949 "CO ",$fth->loc,
950 '\,|$',80) || return; #'
951 return 1;
953 $self->_write_line_EMBL_regex(sprintf("FT %-15s ",$fth->key),
954 "FT ",$fth->loc,
955 '\,|$',80) || return; #'
956 foreach my $tag ( keys %{$fth->field} ) {
957 if ( ! defined $fth->field->{$tag} ) {
958 next;
960 foreach my $value ( @{$fth->field->{$tag}} ) {
961 $value =~ s/\"/\"\"/g;
962 if ($value eq "_no_value") {
963 $self->_write_line_EMBL_regex("FT ",
964 "FT ",
965 "/$tag",'.|$',80) || return; #'
967 # there are almost 3x more quoted qualifier values and they
968 # are more common too so we take quoted ones first
970 # Long qualifiers, that will be line wrapped, are always quoted
971 elsif (!$FTQUAL_NO_QUOTE{$tag} or length("/$tag=$value")>=60) {
972 my $pat = $value =~ /\s/ ? '\s|\-|$' : '.|\-|$';
973 $self->_write_line_EMBL_regex("FT ",
974 "FT ",
975 "/$tag=\"$value\"",$pat,80) || return;
976 } else {
977 $self->_write_line_EMBL_regex("FT ",
978 "FT ",
979 "/$tag=$value",'.|$',80) || return; #'
984 return 1;
989 =head2 _read_EMBL_Contig()
991 Title : _read_EMBL_Contig
992 Usage :
993 Function: convert CO lines into annotations
994 Returns :
995 Args :
997 =cut
999 sub _read_EMBL_Contig {
1000 my ($self, $buffer) = @_;
1001 my @ret;
1002 if ( $$buffer !~ /^CO/ ) {
1003 warn("Not parsing line '$$buffer' which maybe important");
1005 $self->_pushback($$buffer);
1006 while ( defined ($_ = $self->_readline) ) {
1007 /^C/ || last;
1008 /^CO\s+(.*)/ && do {
1009 push @ret, Bio::Annotation::SimpleValue->new( -tagname => 'contig',
1010 -value => $1);
1013 $$buffer = $_;
1014 return @ret;
1018 =head2 _read_EMBL_References
1020 Title : _read_EMBL_References
1021 Usage :
1022 Function: Reads references from EMBL format. Internal function really
1023 Example :
1024 Returns :
1025 Args :
1028 =cut
1030 sub _read_EMBL_References {
1031 my ($self,$buffer) = @_;
1032 my (@refs);
1034 # assume things are starting with RN
1036 if ( $$buffer !~ /^RN/ ) {
1037 warn("Not parsing line '$$buffer' which maybe important");
1039 my $b1;
1040 my $b2;
1041 my $title;
1042 my $loc;
1043 my $au;
1044 my $med;
1045 my $pm;
1046 my $com;
1048 while ( defined ($_ = $self->_readline) ) {
1049 /^R/ || last;
1050 /^RP (\d+)-(\d+)/ && do {$b1=$1;$b2=$2;};
1051 /^RX MEDLINE;\s+(\d+)/ && do {$med=$1};
1052 /^RX PUBMED;\s+(\d+)/ && do {$pm=$1};
1053 /^RA (.*)/ && do {
1054 $au = $self->_concatenate_lines($au,$1); next;
1056 /^RT (.*)/ && do {
1057 $title = $self->_concatenate_lines($title,$1); next;
1059 /^RL (.*)/ && do {
1060 $loc = $self->_concatenate_lines($loc,$1); next;
1062 /^RC (.*)/ && do {
1063 $com = $self->_concatenate_lines($com,$1); next;
1067 my $ref = Bio::Annotation::Reference->new();
1068 $au =~ s/;\s*$//g;
1069 $title =~ s/;\s*$//g;
1071 $ref->start($b1);
1072 $ref->end($b2);
1073 $ref->authors($au);
1074 $ref->title($title);
1075 $ref->location($loc);
1076 $ref->medline($med);
1077 $ref->comment($com);
1078 $ref->pubmed($pm);
1080 push(@refs,$ref);
1081 $$buffer = $_;
1083 return @refs;
1086 =head2 _read_EMBL_Species
1088 Title : _read_EMBL_Species
1089 Usage :
1090 Function: Reads the EMBL Organism species and classification
1091 lines.
1092 Example :
1093 Returns : A Bio::Species object
1094 Args : a reference to the current line buffer, accession number
1096 =cut
1098 sub _read_EMBL_Species {
1099 my( $self, $buffer, $acc ) = @_;
1100 my $org;
1102 $_ = $$buffer;
1103 my( $sub_species, $species, $genus, $common, $sci_name, $class_lines );
1104 while (defined( $_ ||= $self->_readline )) {
1105 if (/^OS\s+(.+)/) {
1106 $sci_name .= ($sci_name) ? ' '.$1 : $1;
1107 } elsif (s/^OC\s+(.+)$//) {
1108 $class_lines .= $1;
1109 } elsif (/^OG\s+(.*)/) {
1110 $org = $1;
1111 } else {
1112 last;
1115 $_ = undef; # Empty $_ to trigger read of next line
1118 # $$buffer = $_;
1119 $self->_pushback($_);
1121 $sci_name =~ s{\.$}{};
1122 $sci_name || return;
1124 # Convert data in classification lines into classification array.
1125 # only split on ';' or '.' so that classification that is 2 or more words
1126 # will still get matched, use map() to remove trailing/leading/intervening
1127 # spaces
1128 my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /(?<!subgen)[;\.]+/, $class_lines;
1130 # do we have a genus?
1131 my $possible_genus = $class[-1];
1132 $possible_genus .= "|$class[-2]" if $class[-2];
1133 if ($sci_name =~ /^($possible_genus)/) {
1134 $genus = $1;
1135 ($species) = $sci_name =~ /^$genus\s+(.+)/;
1136 } else {
1137 $species = $sci_name;
1140 # Don't make a species object if it is "Unknown" or "None"
1141 if ($genus) {
1142 return if $genus =~ /^(Unknown|None)$/i;
1145 # is this organism of rank species or is it lower?
1146 # (doesn't catch everything, but at least the guess isn't dangerous)
1147 if ($species =~ /subsp\.|var\./) {
1148 ($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/;
1151 # sometimes things have common name in brackets, like
1152 # Schizosaccharomyces pombe (fission yeast), so get rid of the common
1153 # name bit. Probably dangerous if real scientific species name ends in
1154 # bracketed bit.
1155 unless ($class[-1] eq 'Viruses') {
1156 ($species, $common) = $species =~ /^(.+)\s+\((.+)\)$/;
1157 $sci_name =~ s/\s+\(.+\)$// if $common;
1160 # Bio::Species array needs array in Species -> Kingdom direction
1161 unless ($class[-1] eq $sci_name) {
1162 push(@class, $sci_name);
1164 @class = reverse @class;
1166 # do minimal sanity checks before we hand off to Bio::Species which won't
1167 # be able to give informative throw messages if it has to throw because
1168 # of problems here
1169 $self->throw("$acc seems to be missing its OS line: invalid.") unless $sci_name;
1170 my %names;
1171 foreach my $i (0..$#class) {
1172 my $name = $class[$i];
1173 $names{$name}++;
1174 # this code breaks examples like: Xenopus (Silurana) tropicalis
1175 # commenting out, see bug 3158
1177 #if ($names{$name} > 1 && ($name ne $class[$i - 1])) {
1178 # $self->warn("$acc seems to have an invalid species classification:$name ne $class[$i - 1]");
1181 my $make = Bio::Species->new();
1182 $make->scientific_name($sci_name);
1183 $make->classification(@class);
1184 unless ($class[-1] eq 'Viruses') {
1185 $make->genus($genus) if $genus;
1186 $make->species($species) if $species;
1187 $make->sub_species($sub_species) if $sub_species;
1188 $make->common_name($common) if $common;
1190 $make->organelle($org) if $org;
1191 return $make;
1194 =head2 _read_EMBL_DBLink
1196 Title : _read_EMBL_DBLink
1197 Usage :
1198 Function: Reads the EMBL database cross reference ("DR") lines
1199 Example :
1200 Returns : A list of Bio::Annotation::DBLink objects
1201 Args :
1203 =cut
1205 sub _read_EMBL_DBLink {
1206 my( $self,$buffer ) = @_;
1207 my( @db_link );
1209 $_ = $$buffer;
1210 while (defined( $_ ||= $self->_readline )) {
1211 if ( /^DR ([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?\.$/) {
1212 my ($databse, $prim_id, $sec_id) = ($1,$2,$3);
1213 my $link = Bio::Annotation::DBLink->new(-database => $databse,
1214 -primary_id => $prim_id,
1215 -optional_id => $sec_id);
1217 push(@db_link, $link);
1218 } else {
1219 last;
1221 $_ = undef; # Empty $_ to trigger read of next line
1224 $$buffer = $_;
1225 return @db_link;
1228 =head2 _read_EMBL_TaxID_DBLink
1230 Title : _read_EMBL_TaxID_DBLink
1231 Usage :
1232 Function: Reads the EMBL database cross reference to NCBI TaxID ("OX") lines
1233 Example :
1234 Returns : A list of Bio::Annotation::DBLink objects
1235 Args :
1237 =cut
1239 sub _read_EMBL_TaxID_DBLink {
1240 my( $self,$buffer ) = @_;
1241 my( @db_link );
1243 $_ = $$buffer;
1244 while (defined( $_ ||= $self->_readline )) {
1245 if ( /^OX (\S+)=(\d+);$/ ) {
1246 my ($databse, $prim_id) = ($1,$2);
1247 my $link = Bio::Annotation::DBLink->new(-database => $databse,
1248 -primary_id => $prim_id,);
1249 push(@db_link, $link);
1250 } else {
1251 last;
1253 $_ = undef; # Empty $_ to trigger read of next line
1256 $$buffer = $_;
1257 return @db_link;
1260 =head2 _filehandle
1262 Title : _filehandle
1263 Usage : $obj->_filehandle($newval)
1264 Function:
1265 Example :
1266 Returns : value of _filehandle
1267 Args : newvalue (optional)
1270 =cut
1272 sub _filehandle{
1273 my ($obj,$value) = @_;
1274 if ( defined $value) {
1275 $obj->{'_filehandle'} = $value;
1277 return $obj->{'_filehandle'};
1281 =head2 _read_FTHelper_EMBL
1283 Title : _read_FTHelper_EMBL
1284 Usage : _read_FTHelper_EMBL($buffer)
1285 Function: reads the next FT key line
1286 Example :
1287 Returns : Bio::SeqIO::FTHelper object
1288 Args : filehandle and reference to a scalar
1291 =cut
1293 sub _read_FTHelper_EMBL {
1294 my ($self,$buffer) = @_;
1296 my ($key, # The key of the feature
1297 $loc, # The location line from the feature
1298 @qual, # An arrray of lines making up the qualifiers
1301 if ($$buffer =~ /^FT\s{3}(\S+)\s+(\S+)/ ) {
1302 $key = $1;
1303 $loc = $2;
1304 # Read all the lines up to the next feature
1305 while ( defined($_ = $self->_readline) ) {
1306 if (/^FT(\s+)(.+?)\s*$/) {
1307 # Lines inside features are preceeded by 19 spaces
1308 # A new feature is preceeded by 3 spaces
1309 if (length($1) > 4) {
1310 # Add to qualifiers if we're in the qualifiers
1311 if (@qual) {
1312 push(@qual, $2);
1314 # Start the qualifier list if it's the first qualifier
1315 elsif (substr($2, 0, 1) eq '/') {
1316 @qual = ($2);
1318 # We're still in the location line, so append to location
1319 else {
1320 $loc .= $2;
1322 } else {
1323 # We've reached the start of the next feature
1324 last;
1326 } else {
1327 # We're at the end of the feature table
1328 last;
1331 } elsif ( $$buffer =~ /^CO\s+(\S+)/) {
1332 $key = 'CONTIG';
1333 $loc = $1;
1334 # Read all the lines up to the next feature
1335 while ( defined($_ = $self->_readline) ) {
1336 if (/^CO\s+(\S+)\s*$/) {
1337 $loc .= $1;
1338 } else {
1339 # We've reached the start of the next feature
1340 last;
1343 } else {
1344 # No feature key
1345 return;
1348 # Put the first line of the next feature into the buffer
1349 $$buffer = $_;
1351 # Make the new FTHelper object
1352 my $out = Bio::SeqIO::FTHelper->new();
1353 $out->verbose($self->verbose());
1354 $out->key($key);
1355 $out->loc($loc);
1357 # Now parse and add any qualifiers. (@qual is kept
1358 # intact to provide informative error messages.)
1359 QUAL: for (my $i = 0; $i < @qual; $i++) {
1360 $_ = $qual[$i];
1361 my( $qualifier, $value ) = m{^/([^=]+)(?:=(.+))?}
1362 or $self->throw("Can't see new qualifier in: $_\nfrom:\n"
1363 . join('', map "$_\n", @qual));
1364 if (defined $value) {
1365 # Do we have a quoted value?
1366 if (substr($value, 0, 1) eq '"') {
1367 # Keep adding to value until we find the trailing quote
1368 # and the quotes are balanced
1369 QUOTES:
1370 while ($value !~ /"$/ or $value =~ tr/"/"/ % 2) { #"
1371 $i++;
1372 my $next = $qual[$i];
1373 if (!defined($next)) {
1374 $self->warn("Unbalanced quote in:\n".join("\n", @qual).
1375 "\nAdding quote to close...".
1376 "Check sequence quality!");
1377 $value .= '"';
1378 last QUOTES;
1381 # Protein sequence translations need to be joined without spaces,
1382 # other qualifiers need those.
1383 if ($qualifier eq "translation") {
1384 $value .= $next;
1385 } else {
1386 $value .= " $next";
1389 # Trim leading and trailing quotes
1390 $value =~ s/^"|"$//g;
1391 # Undouble internal quotes
1392 $value =~ s/""/"/g; #"
1394 } else {
1395 $value = '_no_value';
1398 # Store the qualifier
1399 $out->field->{$qualifier} ||= [];
1400 push(@{$out->field->{$qualifier}},$value);
1403 return $out;
1406 =head2 _write_line_EMBL
1408 Title : _write_line_EMBL
1409 Usage :
1410 Function: internal function
1411 Example :
1412 Returns : 1 if writing suceeded, else undef
1413 Args :
1416 =cut
1418 sub _write_line_EMBL {
1419 my ($self,$pre1,$pre2,$line,$length) = @_;
1421 $length || $self->throw("Miscalled write_line_EMBL without length. Programming error!");
1422 my $subl = $length - length $pre2;
1423 my $linel = length $line;
1424 my $i;
1426 my $sub = substr($line,0,$length - length $pre1);
1428 $self->_print( "$pre1$sub\n") || return;
1430 for ($i= ($length - length $pre1);$i < $linel;) {
1431 $sub = substr($line,$i,($subl));
1432 $self->_print( "$pre2$sub\n") || return;
1433 $i += $subl;
1436 return 1;
1439 =head2 _write_line_EMBL_regex
1441 Title : _write_line_EMBL_regex
1442 Usage :
1443 Function: internal function for writing lines of specified
1444 length, with different first and the next line
1445 left hand headers and split at specific points in the
1446 text
1447 Example :
1448 Returns : nothing
1449 Args : file handle, first header, second header, text-line, regex for line breaks, total line length
1452 =cut
1454 sub _write_line_EMBL_regex {
1455 my ($self,$pre1,$pre2,$line,$regex,$length) = @_;
1457 #print STDOUT "Going to print with $line!\n";
1459 $length || $self->throw("Programming error - called write_line_EMBL_regex without length.");
1461 my $subl = $length - (length $pre1) -1 ;
1462 my( @lines );
1464 CHUNK: while($line) {
1465 foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) {
1466 if ($line =~ m/^(.{0,$subl})($pat)(.*)/ ) {
1467 my $l = $1.$2;
1468 $l =~ s/#/ /g # remove word wrap protection char '#'
1469 if $pre1 eq "RA ";
1470 my $newl = $3;
1471 $line = substr($line,length($l));
1472 # be strict about not padding spaces according to
1473 # genbank format
1474 $l =~ s/\s+$//;
1475 next CHUNK if ($l eq '');
1476 push(@lines, $l);
1477 next CHUNK;
1480 # if we get here none of the patterns matched $subl or less chars
1481 $self->warn("trouble dissecting \"$line\"\n into chunks ".
1482 "of $subl chars or less - this tag won't print right");
1483 # insert a space char to prevent infinite loops
1484 $line = substr($line,0,$subl) . " " . substr($line,$subl);
1486 my $s = shift @lines;
1487 ($self->_print("$pre1$s\n") || return) if $s;
1488 foreach my $s ( @lines ) {
1489 $self->_print("$pre2$s\n") || return;
1492 return 1;
1495 =head2 _post_sort
1497 Title : _post_sort
1498 Usage : $obj->_post_sort($newval)
1499 Function:
1500 Returns : value of _post_sort
1501 Args : newvalue (optional)
1504 =cut
1506 sub _post_sort{
1507 my $obj = shift;
1508 if ( @_ ) {
1509 my $value = shift;
1510 $obj->{'_post_sort'} = $value;
1512 return $obj->{'_post_sort'};
1516 =head2 _show_dna
1518 Title : _show_dna
1519 Usage : $obj->_show_dna($newval)
1520 Function:
1521 Returns : value of _show_dna
1522 Args : newvalue (optional)
1525 =cut
1527 sub _show_dna{
1528 my $obj = shift;
1529 if ( @_ ) {
1530 my $value = shift;
1531 $obj->{'_show_dna'} = $value;
1533 return $obj->{'_show_dna'};
1537 =head2 _id_generation_func
1539 Title : _id_generation_func
1540 Usage : $obj->_id_generation_func($newval)
1541 Function:
1542 Returns : value of _id_generation_func
1543 Args : newvalue (optional)
1546 =cut
1548 sub _id_generation_func{
1549 my $obj = shift;
1550 if ( @_ ) {
1551 my $value = shift;
1552 $obj->{'_id_generation_func'} = $value;
1554 return $obj->{'_id_generation_func'};
1558 =head2 _ac_generation_func
1560 Title : _ac_generation_func
1561 Usage : $obj->_ac_generation_func($newval)
1562 Function:
1563 Returns : value of _ac_generation_func
1564 Args : newvalue (optional)
1567 =cut
1569 sub _ac_generation_func{
1570 my $obj = shift;
1571 if ( @_ ) {
1572 my $value = shift;
1573 $obj->{'_ac_generation_func'} = $value;
1575 return $obj->{'_ac_generation_func'};
1579 =head2 _sv_generation_func
1581 Title : _sv_generation_func
1582 Usage : $obj->_sv_generation_func($newval)
1583 Function:
1584 Returns : value of _sv_generation_func
1585 Args : newvalue (optional)
1588 =cut
1590 sub _sv_generation_func{
1591 my $obj = shift;
1592 if ( @_ ) {
1593 my $value = shift;
1594 $obj->{'_sv_generation_func'} = $value;
1596 return $obj->{'_sv_generation_func'};
1600 =head2 _kw_generation_func
1602 Title : _kw_generation_func
1603 Usage : $obj->_kw_generation_func($newval)
1604 Function:
1605 Returns : value of _kw_generation_func
1606 Args : newvalue (optional)
1609 =cut
1611 sub _kw_generation_func{
1612 my $obj = shift;
1613 if ( @_ ) {
1614 my $value = shift;
1615 $obj->{'_kw_generation_func'} = $value;
1617 return $obj->{'_kw_generation_func'};