changes all issue tracking in preparation for switch to github issues
[bioperl-live.git] / Bio / Phenotype / OMIM / OMIMparser.pm
blob9254b01a480f0822d65d76ec3e5f7068bf6281cc
2 # BioPerl module for Bio::Phenotype::OMIM::OMIMparser
4 # Please direct questions and support issues to <bioperl-l@bioperl.org>
6 # Cared for by Christian M. Zmasek <czmasek-at-burnham.org> or <cmzmasek@yahoo.com>
8 # (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002.
9 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
11 # You may distribute this module under the same terms as perl itself.
12 # Refer to the Perl Artistic License (see the license accompanying this
13 # software package, or see http://www.perl.com/language/misc/Artistic.html)
14 # for the terms under which you may use, modify, and redistribute this module.
16 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
17 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
18 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 # You may distribute this module under the same terms as perl itself
22 # POD documentation - main docs before the code
24 =head1 NAME
26 Bio::Phenotype::OMIM::OMIMparser - parser for the OMIM database
28 =head1 SYNOPSIS
30 use Bio::Phenotype::OMIM::OMIMparser;
32 # The OMIM database is available as textfile at:
33 # ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z
34 # The genemap is available as textfile at:
35 # ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap
37 $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap",
38 -omimtext => "/path/to/omim.txt" );
40 while ( my $omim_entry = $omim_parser->next_phenotype() ) {
41 # This prints everything.
42 print( $omim_entry->to_string() );
43 print "\n\n";
45 # This gets individual data (some of them object-arrays)
46 # (and illustrates the relevant methods of OMIMentry).
47 my $numb = $omim_entry->MIM_number(); # *FIELD* NO
48 my $title = $omim_entry->title(); # *FIELD* TI - first line
49 my $alt = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines
50 my $mtt = $omim_entry->more_than_two_genes(); # "#" before title
51 my $sep = $omim_entry->is_separate(); # "*" before title
52 my $desc = $omim_entry->description(); # *FIELD* TX
53 my $mm = $omim_entry->mapping_method(); # from genemap
54 my $gs = $omim_entry->gene_status(); # from genemap
55 my $cr = $omim_entry->created(); # *FIELD* CD
56 my $cont = $omim_entry->contributors(); # *FIELD* CN
57 my $ed = $omim_entry->edited(); # *FIELD* ED
58 my $sa = $omim_entry->additional_references(); # *FIELD* SA
59 my $cs = $omim_entry->clinical_symptoms_raw(); # *FIELD* CS
60 my $comm = $omim_entry->comment(); # from genemap
62 my $mini_mim = $omim_entry->miniMIM(); # *FIELD* MN
63 # A Bio::Phenotype::OMIM::MiniMIMentry object.
64 # class Bio::Phenotype::OMIM::MiniMIMentry
65 # provides the following:
66 # - description()
67 # - created()
68 # - contributors()
69 # - edited()
71 # Prints the contents of the MINI MIM entry (most OMIM entries do
72 # not have MINI MIM entries, though).
73 print $mini_mim->description()."\n";
74 print $mini_mim->created()."\n";
75 print $mini_mim->contributors()."\n";
76 print $mini_mim->edited()."\n";
78 my @corrs = $omim_entry->each_Correlate(); # from genemap
79 # Array of Bio::Phenotype::Correlate objects.
80 # class Bio::Phenotype::Correlate
81 # provides the following:
82 # - name()
83 # - description() (not used)
84 # - species() (always mouse)
85 # - type() ("OMIM mouse correlate")
86 # - comment()
88 my @refs = $omim_entry->each_Reference(); # *FIELD* RF
89 # Array of Bio::Annotation::Reference objects.
92 my @avs = $omim_entry->each_AllelicVariant(); # *FIELD* AV
93 # Array of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects.
94 # class Bio::Phenotype::OMIM::OMIMentryAllelicVariant
95 # provides the following:
96 # - number (e.g ".0001" )
97 # - title (e.g "ALCOHOL INTOLERANCE" )
98 # - symbol (e.g "ALDH2*2" )
99 # - description (e.g "The ALDH2*2-encoded protein has a change ..." )
100 # - aa_ori (used if information in the form "LYS123ARG" is found)
101 # - aa_mut (used if information in the form "LYS123ARG" is found)
102 # - position (used if information in the form "LYS123ARG" is found)
103 # - additional_mutations (used for e.g. "1-BP DEL, 911T")
105 my @cps = $omim_entry->each_CytoPosition(); # from genemap
106 # Array of Bio::Map::CytoPosition objects.
108 my @gss = $omim_entry->each_gene_symbol(); # from genemap
109 # Array of strings.
111 # do something ...
114 =head1 DESCRIPTION
116 This parser returns Bio::Phenotype::OMIM::OMIMentry objects
117 (which inherit from Bio::Phenotype::PhenotypeI).
118 It parses the OMIM database available as
119 ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z
120 together with (optionally) the gene map file at
121 ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap.
124 =head1 FEEDBACK
126 =head2 Mailing Lists
128 User feedback is an integral part of the evolution of this and other
129 Bioperl modules. Send your comments and suggestions preferably to the
130 Bioperl mailing lists Your participation is much appreciated.
132 bioperl-l@bioperl.org - General discussion
133 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
135 =head2 Support
137 Please direct usage questions or support issues to the mailing list:
139 I<bioperl-l@bioperl.org>
141 rather than to the module maintainer directly. Many experienced and
142 reponsive experts will be able look at the problem and quickly
143 address it. Please include a thorough description of the problem
144 with code and data examples if at all possible.
146 =head2 Reporting Bugs
148 report bugs to the Bioperl bug tracking system to help us keep track
149 the bugs and their resolution. Bug reports can be submitted via the
150 web:
152 https://github.com/bioperl/bioperl-live/issues
154 =head1 AUTHOR
156 Christian M. Zmasek
158 Email: czmasek-at-burnham.org or cmzmasek@yahoo.com
160 WWW: http://monochrome-effect.net/
162 Address:
164 Genomics Institute of the Novartis Research Foundation
165 10675 John Jay Hopkins Drive
166 San Diego, CA 92121
168 =head1 APPENDIX
170 The rest of the documentation details each of the object
171 methods. Internal methods are usually preceded with a _
173 =cut
176 # Let the code begin...
179 package Bio::Phenotype::OMIM::OMIMparser;
181 use strict;
183 use Bio::Root::IO;
184 use Bio::Species;
185 use Bio::Annotation::Reference;
186 use Bio::Map::CytoPosition;
187 use Bio::Phenotype::OMIM::OMIMentry;
188 use Bio::Phenotype::OMIM::OMIMentryAllelicVariant;
189 use Bio::Phenotype::Correlate;
191 use base qw(Bio::Root::Root);
194 use constant DEFAULT_STATE => 0;
195 use constant MIM_NUMBER_STATE => 1;
196 use constant TITLE_STATE => 2;
197 use constant TEXT_STATE => 3;
198 use constant MINI_MIM_TEXT_STATE => 4;
199 use constant ALLELIC_VARIANT_STATE => 5;
200 use constant SEE_ALSO_STATE => 6;
201 use constant REF_STATE => 7;
202 use constant SYMPT_STATE => 8;
203 use constant CONTRIBUTORS_STATE => 9;
204 use constant CREATED_BY_STATE => 10;
205 use constant EDITED_BY_STATE => 11;
206 use constant MINI_MIM_EDITED_BY_STATE => 12;
207 use constant MINI_MIM_CREATED_BY_STATE => 13;
208 use constant MINI_MIM_CONTRIBUTORS_STATE => 14;
209 use constant TRUE => 1;
210 use constant FALSE => 0;
214 =head2 new
216 Title : new
217 Usage : $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap",
218 -omimtext => "/path/to/omim.txt" );
219 Function: Creates a new OMIMparser.
220 Returns : A new OMIMparser object.
221 Args : -genemap => the genemap file name (optional)
222 -omimtext => the omim text file name
224 =cut
226 sub new {
227 my( $class, @args ) = @_;
229 my $self = $class->SUPER::new( @args );
231 my ( $genemap_file_name, $omimtxt_file_name )
232 = $self->_rearrange( [ qw( GENEMAP OMIMTEXT ) ], @args );
234 $self->init();
236 $genemap_file_name && $self->genemap_file_name( $genemap_file_name );
238 $omimtxt_file_name && $self->omimtxt_file_name( $omimtxt_file_name);
240 return $self;
246 =head2 next_phenotype
248 Title : next_phenotype()
249 Usage : while ( my $omim_entry = $omim_parser->next_phenotype() ) {
250 # do something with $omim_entry
252 Function: Returns an Bio::Phenotype::OMIM::OMIMentry or
253 undef once the end of the omim text file is reached.
254 Returns : A Bio::Phenotype::OMIM::OMIMentry.
255 Args :
257 =cut
259 sub next_phenotype {
260 my ( $self ) = @_;
262 unless( defined( $self->_OMIM_text_file() ) ) {
263 $self->_no_OMIM_text_file_provided_error();
266 if ( $self->_done() == TRUE ) {
267 return;
270 my $fieldtag = "";
271 my $contents = "";
272 my $line = "";
273 my $state = DEFAULT_STATE;
274 my $saw_mini_min_flag = FALSE;
275 my %record = ();
277 while( $line = ( $self->_OMIM_text_file )->_readline() ) {
278 if ( $line =~ /^\s*\*RECORD\*/ ) {
279 if ( $self->_is_not_first_record() == TRUE ) {
280 $self->_add_to_hash( $state, $contents,\%record );
281 my $omim_entry = $self->_createOMIMentry( \%record );
282 return $omim_entry;
284 else {
285 $self->_is_not_first_record( TRUE );
289 elsif ( $line =~ /^\s*\*FIELD\*\s*(\S+)/ ) {
290 $fieldtag = $1;
291 if ( $state != DEFAULT_STATE ) {
292 $self->_add_to_hash( $state, $contents,\%record );
294 $contents = "";
296 if ( $fieldtag eq "NO" ) {
297 $state = MIM_NUMBER_STATE;
298 $saw_mini_min_flag = FALSE;
300 elsif ( $fieldtag eq "TI" ) {
301 $state = TITLE_STATE;
302 $saw_mini_min_flag = FALSE;
304 elsif ( $fieldtag eq "TX" ) {
305 $state = TEXT_STATE;
306 $saw_mini_min_flag = FALSE;
308 elsif ( $fieldtag eq "MN" ) {
309 $state = MINI_MIM_TEXT_STATE;
310 $saw_mini_min_flag = TRUE;
312 elsif ( $fieldtag eq "AV" ) {
313 $state = ALLELIC_VARIANT_STATE;
314 $saw_mini_min_flag = FALSE;
316 elsif ( $fieldtag eq "SA" ) {
317 $state = SEE_ALSO_STATE;
318 $saw_mini_min_flag = FALSE;
320 elsif ( $fieldtag eq "RF" ) {
321 $state = REF_STATE;
322 $saw_mini_min_flag = FALSE;
324 elsif ( $fieldtag eq "CS" ) {
325 $state = SYMPT_STATE;
326 $saw_mini_min_flag = FALSE;
328 elsif ( $fieldtag eq "CN" ) {
329 if ( $saw_mini_min_flag == TRUE ) {
330 $state = MINI_MIM_CONTRIBUTORS_STATE;
332 else {
333 $state = CONTRIBUTORS_STATE;
336 elsif ( $fieldtag eq "CD" ) {
337 if ( $saw_mini_min_flag == TRUE ) {
338 $state = MINI_MIM_CREATED_BY_STATE;
340 else {
341 $state = CREATED_BY_STATE;
344 elsif ( $fieldtag eq "ED" ) {
345 if ( $saw_mini_min_flag == TRUE ) {
346 $state = MINI_MIM_EDITED_BY_STATE;
348 else {
349 $state = EDITED_BY_STATE;
352 else {
353 print "Warning: Unknown tag: $fieldtag\n";
357 else {
358 $contents .= $line;
362 $self->_OMIM_text_file()->close();
363 $self->_done( TRUE );
365 unless( %record ) {
366 $self->_not_a_OMIM_text_file_error();
369 $self->_add_to_hash( $state, $contents,\%record );
371 my $omim_entry = $self->_createOMIMentry( \%record );
373 return $omim_entry;
375 } # next_phenotype
380 =head2 init
382 Title : init()
383 Usage : $omim_parser->init();
384 Function: Initializes this OMIMparser to all "".
385 Returns :
386 Args :
388 =cut
390 sub init {
391 my ( $self ) = @_;
393 $self->genemap_file_name( "" );
394 $self->omimtxt_file_name( "" );
395 $self->_genemap_hash( {} );
396 $self->_OMIM_text_file( undef );
397 $self->_is_not_first_record( FALSE );
398 $self->_done( FALSE );
400 } # init
405 =head2 genemap_file_name
407 Title : genemap_file_name
408 Usage : $omimparser->genemap_file_name( "genemap" );
409 Function: Set/get for the genemap file name.
410 Returns : The genemap file name [string].
411 Args : The genemap file name [string] (optional).
413 =cut
415 sub genemap_file_name {
416 my ( $self, $value ) = @_;
418 if ( defined $value ) {
419 $self->{ "_genemap_file_name" } = $value;
420 $self->_genemap_hash( $self->_read_genemap( $value ) );
423 return $self->{ "_genemap_file_name" };
424 } # genemap_file_name
429 =head2 omimtxt_file_name
431 Title : omimtxt_file_name
432 Usage : $omimparser->omimtxt_file_name( "omim.txt" );
433 Function: Set/get for the omim text file name.
434 Returns : The the omim text file name [string].
435 Args : The the omim text file name [string] (optional).
437 =cut
439 sub omimtxt_file_name {
440 my ( $self, $value ) = @_;
442 if ( defined $value ) {
443 $self->{ "_omimtxt_file_name" } = $value;
444 if ( $value =~ /\W/ ) {
445 $self->_OMIM_text_file( Bio::Root::IO->new->new( -file => $value ) );
449 return $self->{ "_omimtxt_file_name" };
450 } # omimtxt_file_name
456 sub _createOMIMentry {
457 my ( $self, $record_ref ) = @_;
459 my $omim_entry = Bio::Phenotype::OMIM::OMIMentry->new();
460 my $mini_mim = Bio::Phenotype::OMIM::MiniMIMentry->new();
462 while ( ( my $key, my $val ) = each( %$record_ref ) ) {
464 $val =~ s/^\s+//;
465 $val =~ s/\s+$//;
467 if ( $key == MIM_NUMBER_STATE ) {
468 $val =~ s/\s+//g;
469 $val =~ s/\D//g;
471 $omim_entry->MIM_number( $val );
473 my $gm = $self->_genemap_hash();
474 if ( exists( $$gm{ $val } ) ) {
475 $self->_parse_genemap( $omim_entry, $val );
479 elsif ( $key == TITLE_STATE ) {
480 my ( $title, $alt_titles ) = $self->_parse_title( $val );
481 $omim_entry->title( $title );
482 $omim_entry->alternative_titles_and_symbols( $alt_titles );
483 if ( $title =~ /^\*/ ) {
484 $omim_entry->is_separate( TRUE );
486 elsif ( $title =~ /^#/ ) {
487 $omim_entry->more_than_two_genes( TRUE );
490 elsif ( $key == TEXT_STATE ) {
491 $val = undef if($val =~ /DESCRIPTION1\nDESCRIPTION2/);
492 $omim_entry->description( $val );
494 elsif ( $key == ALLELIC_VARIANT_STATE ) {
495 my @allelic_variants = $self->_parse_allelic_variants( $val );
496 $omim_entry->add_AllelicVariants( @allelic_variants );
498 elsif ( $key == SEE_ALSO_STATE ) {
499 $omim_entry->additional_references( $val );
501 elsif ( $key == REF_STATE ) {
502 my @refs = $self->_parse_references( $val );
503 $omim_entry->add_References( @refs );
505 elsif ( $key == SYMPT_STATE ) {
506 $val = '' if($val eq 'clinical symptoms');
507 $omim_entry->clinical_symptoms_raw( $val );
509 elsif ( $key == CONTRIBUTORS_STATE ) {
510 $val = undef if($val =~ /cn1\ncn2\ncn3/);
511 $omim_entry->contributors( $val );
513 elsif ( $key == CREATED_BY_STATE ) {
514 $val = undef if($val =~ /cd1\ncd2\ncd3/);
515 $omim_entry->created( $val );
517 elsif ( $key == EDITED_BY_STATE ) {
518 $val = undef if($val =~ /ed1\ned2\ned3/);
519 $omim_entry->edited( $val );
521 elsif ( $key == MINI_MIM_TEXT_STATE ) {
522 $mini_mim->description( $val );
524 elsif ( $key == MINI_MIM_CONTRIBUTORS_STATE ) {
525 $mini_mim->contributors( $val );
527 elsif ( $key == MINI_MIM_CREATED_BY_STATE ) {
528 $mini_mim->created( $val );
530 elsif ( $key == MINI_MIM_EDITED_BY_STATE ) {
531 $mini_mim->edited( $val );
536 my $man = Bio::Species->new();
537 $man->classification( qw( sapiens Homo ) );
538 $man->common_name( "man" );
539 $omim_entry->species( $man );
540 $omim_entry->miniMIM( $mini_mim );
542 # parse the symptoms text into a hash-based structure.
543 $self->_finer_parse_symptoms($omim_entry);
545 return $omim_entry;
547 } # _createOMIMentry
550 sub _finer_parse_symptoms {
551 my ($self, $omim_entry) = @_;
552 my $text = $omim_entry->clinical_symptoms_raw;
553 if( $text ) {
554 my $part;
555 for my $line (split /\n/, $text){
556 if ($line =~ /^([\w\s,]+)\:\s*$/) {
557 $part = $1;
558 } elsif( $line =~ /^\s+$/ ) {
559 } elsif($line =~ /^(\s+)([^;]+)\;?\s*$/){
560 my $symptom = $2;
561 if( ! $part ) {
562 # $self->warn("$text\nline='$line'\n");
563 next;
565 $omim_entry->add_clinical_symptoms($part, $symptom);
569 $omim_entry->clinical_symptoms_raw('');
572 sub _parse_genemap {
573 my ( $self, $omim_entry, $val ) = @_;
575 my $genemap_line = ${ $self->_genemap_hash() }{ $val };
576 my @a = split( /\|/, $genemap_line );
578 my $locations = $a[ 4 ];
579 if ( defined ( $locations ) ) {
580 $locations =~ s/\s+//g;
581 my @ls = split( /[,;]/, $locations );
582 my @cps;
583 foreach my $l ( @ls ) {
584 my $cp = Bio::Map::CytoPosition->new( -value => $l );
585 push( @cps, $cp );
587 $omim_entry->add_CytoPositions( @cps );
590 my $gene_symbols = $a[ 5 ];
591 if ( defined ( $gene_symbols ) ) {
592 $gene_symbols =~ s/\s+//g;
593 my @gss = split( /[,;]/, $gene_symbols );
594 $omim_entry->add_gene_symbols( @gss );
597 my $mouse_correlates = $a[ 16 ];
598 if ( defined ( $mouse_correlates ) ) {
599 $mouse_correlates =~ s/\s+//g;
600 my @mcs = split( /[,;]/, $mouse_correlates );
601 my @cs;
602 foreach my $mc ( @mcs ) {
603 my $mouse = Bio::Species->new();
604 $mouse->classification( qw( musculus Mus ) );
605 $mouse->common_name( "mouse" );
606 my $c = Bio::Phenotype::Correlate->new();
607 $c->name( $mc );
608 $c->species( $mouse );
609 $c->type( "OMIM mouse correlate" );
611 push( @cs, $c );
613 $omim_entry->add_Correlates( @cs );
616 $omim_entry->gene_status( $a[ 6 ] ) if defined $a[ 6 ];
617 $omim_entry->mapping_method( $a[ 10 ] ) if defined $a[ 10 ];
618 $omim_entry->comment( $a[ 11 ] ) if defined $a[ 11 ];
620 } # _parse_genemap
625 sub _parse_allelic_variants {
626 my ( $self, $text ) = @_;
628 my @allelic_variants;
629 my $number = "";
630 my $title = "";
631 my $symbol_mut_line = "";
632 my $prev_line = "";
633 my $description = "";
634 my $saw_empty_line = FALSE;
636 my @lines = split( /\n/, $text );
638 foreach my $line ( @lines ) {
639 if ( $line !~ /\w/ ) {
640 $saw_empty_line = TRUE;
642 elsif ( $line =~ /^\s*(\.\d+)/ ) {
643 my $current_number = $1;
644 if ( $number ne "" ) {
645 my $allelic_variant = $self->_create_allelic_variant( $number, $title,
646 $symbol_mut_line, $description );
648 push( @allelic_variants, $allelic_variant );
650 $number = $current_number;
651 $title = "";
652 $prev_line = "";
653 $symbol_mut_line = "";
654 $description = "";
655 $saw_empty_line = FALSE;
657 elsif ( $title eq "" ) {
658 $title = $line;
660 elsif ( $saw_empty_line == FALSE ) {
661 $prev_line = $line;
663 elsif ( $saw_empty_line == TRUE ) {
664 if ( $prev_line ne "" ) {
665 $symbol_mut_line = $prev_line;
666 $prev_line = "";
668 if ( $description ne "" ) {
669 $description .= "\n" . $line;
671 else {
672 $description = $line;
677 my $allelic_variant = $self->_create_allelic_variant( $number, $title,
678 $symbol_mut_line, $description );
680 push( @allelic_variants, $allelic_variant );
682 return @allelic_variants;
684 } # _parse_allelic_variants
689 sub _create_allelic_variant {
690 my ( $self, $number, $title, $symbol_mut_line, $description ) = @_;
692 my $symbol = "";
693 my $mutation = "";
694 my $aa_ori = "";
695 my $aa_mut = "";
696 my $position = "";
698 if ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*([a-z]{3})(\d+)([a-z]{3})/i ) {
699 $symbol = $1;
700 $aa_ori = $2;
701 $aa_mut = $4;
702 $position = $3;
704 elsif ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*(.+)/ ) {
705 $symbol = $1;
706 $mutation = $2;
708 else {
709 $symbol = $symbol_mut_line;
712 if ( ! defined( $description ) ) { $self->throw("undef desc"); }
713 if ( ! defined( $mutation ) ) { $self->throw("undef mutation"); }
716 my $allelic_variant = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new();
717 $allelic_variant->number( $number );
718 $allelic_variant->aa_ori( $aa_ori );
719 $allelic_variant->aa_mut( $aa_mut );
720 $allelic_variant->position( $position );
721 $allelic_variant->title( $title );
722 $allelic_variant->symbol( $symbol );
723 $allelic_variant->description( $description );
724 $allelic_variant->additional_mutations( $mutation );
726 return $allelic_variant;
728 } # _create_allelic_variant
733 sub _parse_title {
734 my ( $self, $text ) = @_;
735 my $title = "";
736 if ( $text =~ /^(.+)\n/ ) {
737 $title = $1;
738 $text =~ s/^.+\n//;
740 else {
741 $title = $text;
742 $text = "";
746 return ( $title, $text );
747 } # _parse_title
752 sub _parse_references {
753 my ( $self, $text ) = @_;
755 $text =~ s/\A\s+//;
756 $text =~ s/\s+\z//;
757 $text =~ s/\A\d+\.\s*//;
759 my @references;
761 my @texts = split( /\s*\n\s*\n\s*\d+\.\s*/, $text );
763 foreach my $t ( @texts ) {
765 my $authors = "";
766 my $title = "";
767 my $location = "";
769 $t =~ s/\s+/ /g;
771 if ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s+(.+?)\s+(\S+?)\s*:\s*(\w?\d+.*)\s*,\s*(\d+)/ ) {
772 $authors = $1;
773 $title = $2;
774 my $journal = $3;
775 my $volume = $4;
776 my $fromto = $5;
777 my $year = $6;
778 my $from = "",
779 my $to = "";
780 if ( $fromto =~ /(\d+)-+(\d+)/ ) {
781 $from = $1;
782 $to = "-".$2;
784 elsif ( $fromto =~ /\A(\w+)/ ) {
785 $from = $1;
787 $location = $journal." ".$volume." ".$from.$to." (".$year.")";
791 elsif ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s*(.+?)\z/ ) {
792 $authors = $1;
793 $title = $2;
794 $location = $3;
796 else {
797 $title = $t;
800 my $ref = Bio::Annotation::Reference->new( -title => $title,
801 -location => $location,
802 -authors => $authors );
803 push( @references, $ref );
806 return @references;
808 } # _parse_references
813 sub _genemap_hash {
814 my ( $self, $value ) = @_;
816 if ( defined $value ) {
817 unless ( ref( $value ) eq "HASH" ) {
818 $self->throw( "Argument to method \"_genemap_hash\" is not a reference to an Hash" );
820 $self->{ "_genemap_hash" } = $value;
824 return $self->{ "_genemap_hash" };
825 } # _genemap_hash
830 sub _is_not_first_record {
832 my ( $self, $value ) = @_;
834 if ( defined $value ) {
835 unless ( $value == FALSE || $value == TRUE ) {
836 $self->throw( "Found [$value] where [" . TRUE
837 ." or " . FALSE . "] expected" );
839 $self->{ "_not_first_record" } = $value;
842 return $self->{ "_not_first_record" };
843 } # _is_not_first_record
848 sub _done {
849 my ( $self, $value ) = @_;
851 if ( defined $value ) {
852 unless ( $value == FALSE || $value == TRUE ) {
853 $self->throw( "Found [$value] where [" . TRUE
854 ." or " . FALSE . "] expected" );
856 $self->{ "_done" } = $value;
859 return $self->{ "_done" };
860 } # _done
865 sub _OMIM_text_file {
866 my ( $self, $value ) = @_;
868 if ( defined $value ) {
869 unless ( $value->isa( "Bio::Root::IO" ) ) {
870 $self->throw( "[$value] is not a valid \"Bio::Root::IO\"" );
872 $self->{ "_omimtxt_file" } = $value;
876 return $self->{ "_omimtxt_file" };
877 } # _OMIM_text_file
882 sub _read_genemap {
883 my ( $self, $genemap_file_name ) = @_;
885 my $line = "";
886 my %genemap_hash = ();
887 my $genemap_file = Bio::Root::IO->new( -file => $genemap_file_name );
888 my @a = ();
889 my %gm = ();
891 while( $line = $genemap_file->_readline() ) {
892 @a = split( /\|/, $line );
893 unless( scalar( @a ) == 18 ) {
894 $self->throw( "Gene map file \"".$self->genemap_file_name()
895 . "\" is not in the expected format."
896 . " Make sure there is a linebreak after the final line." );
898 $gm{ $a[ 9 ] } = $line;
900 $genemap_file->close();
901 $self->_genemap_hash( \%gm );
903 } #_read_genemap
908 sub _no_OMIM_text_file_provided_error {
909 my ( $self ) = @_;
911 my $msg = "Need to indicate a OMIM text file to read from with\n";
912 $msg .= "either \"OMIMparser->new( -omimtext => \"path/to/omim.txt\" );\"\n";
913 $msg .= "or \"\$omim_parser->omimtxt_file_name( \"path/to/omim.txt\" );\"";
914 $self->throw( $msg );
915 } # _no_OMIM_text_file_provided_error
920 sub _not_a_OMIM_text_file_error {
921 my ( $self ) = @_;
923 my $msg = "File \"".$self->omimtxt_file_name() .
924 "\" appears not to be a OMIM text file";
925 $self->throw( $msg );
926 } # _not_a_OMIM_text_file_error
931 sub _add_to_hash {
932 my ( $self, $state, $contents, $record_ref ) = @_;
934 if ( exists( $record_ref->{ $state } ) ) {
935 chomp( $record_ref->{ $state } );
936 $record_ref->{ $state } = $record_ref->{ $state } . $contents;
938 else {
939 $record_ref->{ $state } = $contents;
941 } # _add_to_hash