bug 2549; fixed small bug in Bio::Taxon which doesn't catch -common_name
[bioperl-live.git] / Bio / MapIO / fpc.pm
blob21e154329fcf3903fb252fa20c80b9695cfa50d0
1 # fpc.pm,v 1.2.2.1 2005/10/09 15:16:27 jason Exp
3 # BioPerl module for Bio::MapIO::fpc
5 # Cared for by Gaurav Gupta <gaurav@genome.arizona.edu>
7 # Copyright AGCoL
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
13 =head1 NAME
15 Bio::MapIO::fpc - A FPC Map reader
17 =head1 SYNOPSIS
19 # do not use this object directly it is accessed through the Bio::MapIO system
21 use Bio::MapIO;
23 -format : specifies the format of the file format is "fpc",
24 -file : specifies the name of the .fpc file
25 -readcor : boolean argument, indicating if .cor is to be read
26 or not. It looks for the .cor file in the same path
27 as .fpc file.
28 0 : doesn't read .cor file
29 1 : reads the .cor file
30 [default 0]
31 -verbose : indicates the process of loading of fpc file
32 my $mapio = Bio::MapIO->new(-format => "fpc",
33 -file => "rice.fpc",
34 -readcor => 0,
35 -verbose => 0);
37 my $map = $mapio->next_map();
39 foreach my $marker ( $map->each_markerid() ) {
40 # loop through the markers associated with the map
41 # likewise for contigs, clones, etc.
45 =head1 DESCRIPTION
47 This object contains code for parsing and processing FPC files and creating
48 L<Bio::Map::Physical> object from it.
50 For faster access and better optimization, the data is stored internally in
51 hashes. The corresponding objects are created on request.
53 We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
54 too much overhead.
56 =cut
58 # Let the code begin...
60 package Bio::MapIO::fpc;
61 use strict;
62 use POSIX;
64 use Bio::Map::Physical;
65 use Bio::Map::Clone;
66 use Bio::Map::Contig;
67 use Bio::Map::FPCMarker;
68 use Bio::Range;
70 use base qw(Bio::MapIO);
72 my $_readcor;
74 =head1 Initializer
76 =head2 _initialize
78 Title : _initialize
79 Usage : called implicitly
80 Function: calls the SUPER::_initialize
81 Returns : nothing
82 Args : species, readcor
84 =cut
86 sub _initialize{
87 my ($self,@args) = @_;
88 my $species;
89 $self->SUPER::_initialize(@args);
90 ($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
91 $_readcor = 0 unless (defined($_readcor));
94 =head1 Access Methods
96 These methods let you get and set the member variables
98 =head2 next_map
100 Title : next_map
101 Usage : my $fpcmap = $mapio->next_map();
102 Function: gets the fpcmap from MapIO
103 Returns : object of type L<Bio::Map::MapI>
104 Args : none
106 =cut
108 sub next_map{
110 my ($self) = @_;
112 my $line;
113 my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
114 $bandcnt,$marker,$seqclone);
115 my ($corfile,$corindex,$BUFFER);
116 my @cordata;
117 my %fpcmarker;
118 my ($contig, $contigNumber);
119 my $curClone = 0;
120 my $curMarker = 0;
121 my $curContig = 0;
122 my %_clones;
123 my %_markers;
124 my %_contigs;
125 my $ctgzeropos = 1;
127 my $map = Bio::Map::Physical->new('-units' => 'CB',
128 '-type' => 'physical');
130 my $filename = $self->file();
131 my $fh = $self->{'_filehandle'};
133 if (defined($_readcor)) {
134 $map->core_exists($_readcor);
136 else {
137 $map->core_exists(0);
140 if ($map->core_exists()) {
141 $corfile = substr($filename,0,length($filename)-3)."cor";
142 if (open(CORE,$corfile)) {
143 while(read(CORE,$BUFFER,2)) {
144 push(@cordata,unpack('n*', $BUFFER));
147 else {
148 $map->core_exists(0);
152 ## Read in the header
153 while (defined($line = <$fh>)) {
154 chomp($line);
156 if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
157 if ($line =~ m{^//\s+([\d.]+)}) {
158 my $version = $1;
159 $version =~ /((\d+)\.(\d+))(.*)/;
160 $map->version($1);
161 if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
164 if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
166 $map->group_type($3) if ($2 eq "Label");
167 $map->group_abbr($5) if ($4 eq "Abbrev");
170 last unless ($line =~ m{^//});
173 if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
174 $map->group_type("Chromosome");
175 $map->group_abbr("Chr");
178 $_contigs{0}{'range'}{'end'} = 0;
179 $_contigs{0}{'range'}{'start'} = 0;
181 ## Read in the clone data
182 while (defined($line = <$fh>)) {
183 $marker = 0;
184 $contig = 0;
185 $seqclone = 0;
186 $contigNumber = 0;
188 my ($type,$name);
189 my (@amatch,@pmatch,@ematch);
191 my $bandsread = 0;
193 last if ($line =~ /^Markerdata/);
196 $line =~ /^(\w+)\s+:\s+"(.+)"/;
198 ## these will be set if we did find the clone line
199 ($type, $name) = ($1, $2);
201 if ($name =~ /sd1/) {
202 $seqclone = 1;
205 $_clones{$name}{'type'} = $type;
206 $_clones{$name}{'contig'} = 0;
207 $_contigs{'0'}{'clones'}{$name} = 0;
209 my $temp;
211 ## Loop through the following lines, getting attributes for clone
212 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
214 if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/) {
215 $_clones{$name}{'contig'} = $1;
216 $_contigs{$1}{'clones'}{$name} = 0;
218 delete($_contigs{'0'}{'clones'}{$name});
220 $temp = $3;
221 $contigNumber = $1;
222 $line = <$fh>;
223 $line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
224 $_clones{$name}{'range'}{'start'} = $temp;
226 $_contigs{$contigNumber}{'range'}{'start'} = $temp
227 if (!exists($_contigs{$contigNumber}{'range'}{'start'})
228 || $_contigs{$contigNumber}{'range'}{'start'}
229 > $temp );
231 $_clones{$name}{'range'}{'end'} = $3;
233 $_contigs{$contigNumber}{'range'}{'end'} = $3
234 if (!exists($_contigs{$contigNumber}{'range'}{'end'})
235 || $_contigs{$contigNumber}{'range'}{'end'} < $3 );
238 elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
239 my $matchtype = "match" . lc(substr($1, 0, 1));
240 $_clones{$name}{$matchtype}{$2} = 0;
242 elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
243 $_clones{$name}{'markers'}{$2} = 0;
244 $_markers{$2}{'clones'}{$name} = 0;
245 $_markers{$2}{'type'} = $1;
246 $_markers{$2}{'contigs'}{$contigNumber} = 0;
247 $_contigs{$contigNumber}{'markers'}{$2} = 0;
249 elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
250 my $i = 0;
251 my @numbands;
252 $bandsread = 1;
254 if ($map->core_exists()) {
255 while($i<$2){
256 push(@numbands,$cordata[($1-1)+$i]);
257 $i++;
259 $_clones{$name}{'bands'} = \@numbands;
261 else {
262 push(@numbands,$1,$2);
263 $_clones{$name}{'bands'} = \@numbands;
265 if (exists($_contigs{0}{'clones'}{$name})) {
266 $_clones{$name}{'range'}{'start'} = $ctgzeropos;
267 $_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
268 $_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
269 $ctgzeropos += $2;
272 elsif ($line =~ /^Gel_number\s+(.+)/) {
273 $_clones{$name}{'gel'} = $1;
275 elsif ($line =~ /^Remark\s+"(.+)"/) {
276 $_clones{$name}{'remark'} .= $1;
277 $_clones{$name}{'remark'} .= "\n";
278 if($seqclone == 1 ) {
279 if( $1 =~ /\,\s+Chr(\d+)\s+/){
280 $_clones{$name}{'group'} = $1;
284 elsif ($line =~ /^Fp_number\s+"(.+)"/) {
285 $_clones{$name}{'fp_number'} = $1;
287 elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
288 $_clones{$name}{'sequence_type'} = $1;
289 $_clones{$name}{'sequence_status'} = $2;
291 elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
292 $_clones{$name}{'fpc_remark'} .= $1;
293 $_clones{$name}{'fpc_remark'} .= "\n";
297 $curClone++;
298 print "Adding clone $curClone...\n\r"
299 if ($self->verbose() && $curClone % 1000 == 0);
302 $map->_setCloneRef(\%_clones);
303 $line = <$fh>;
305 while (defined($line = <$fh>) && $line !~ /Contigdata/) {
306 my ($type,$name);
308 last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
310 ($type, $name) = ($1, $2);
312 $_markers{$name}{'type'} = $type;
313 $_markers{$name}{'group'} = 0;
314 $_markers{$name}{'global'} = 0;
315 $_markers{$name}{'anchor'} = 0;
317 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
318 if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/) {
319 my $position = $1 - floor($1/1000)*1000;
320 $position = sprintf("%.2f",$position);
322 $_markers{$name}{'global'} = $position;
323 $_markers{$name}{'group'} = floor($1/1000);
324 $_markers{$name}{'anchor'} = 1;
326 if(defined($2)) {
327 $_markers{$name}{'framework'} = 1;
329 else {
330 $_markers{$name}{'framework'} = 0;
333 elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
334 my $grpmatch = $1;
335 my $grptype = $map->group_type();
337 $grpmatch =~ /(\d+|\w)(.*)/;
339 my ($group,$subgroup);
340 $group = $1;
341 $subgroup = $2;
343 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
345 $_markers{$name}{'group'} = $group;
346 $_markers{$name}{'subgroup'} = $subgroup;
348 elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
349 $_markers{$name}{'global'} = $1;
350 $_markers{$name}{'anchor'} = 1;
352 if ($2 eq 'F') {
353 $_markers{$name}{'framework'} = 1;
355 else {
356 $_markers{$name}{'framework'} = 0;
359 elsif ($line =~ /^anchor$/) {
360 $_markers{$name}{'anchor'} = 1;
362 elsif ($line =~ /^Remark\s+"(.+)"/) {
363 $_markers{$name}{'remark'} .= $1;
364 $_markers{$name}{'remark'} .= "\n";
367 $curMarker++;
368 print "Adding Marker $curMarker...\n"
369 if ($self->verbose() && $curMarker % 1000 == 0);
372 $map->_setMarkerRef(\%_markers);
374 my $ctgname;
375 my $grpabbr = $map->group_abbr();
376 my $chr_remark;
378 $_contigs{0}{'group'} = 0;
380 while (defined($line = <$fh>)) {
382 if ($line =~ /^Ctg(\d+)/) {
383 $ctgname = $1;
384 $_contigs{$ctgname}{'group'} = 0;
385 $_contigs{$ctgname}{'anchor'} = 0;
386 $_contigs{$ctgname}{'position'} = 0;
388 if ($line =~ /#\w*(.*)\w*$/) {
389 $_contigs{$ctgname}{'remark'} = $1;
390 if ($line =~ /#\s+Chr(\d+)\s+/) {
391 $_contigs{$ctgname}{'group'} = $1;
392 $_contigs{$ctgname}{'anchor'} = 1;
396 elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
398 $_contigs{$ctgname}{'anchor'} = 1;
399 $_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
401 if (defined($2)) {
402 $_contigs{$ctgname}{'group'} = $2;
404 else {
405 $_contigs{$ctgname}{'group'} = "?";
408 elsif ($line =~ /^User_remark\s+"(.+)"/) {
409 $_contigs{$ctgname}{'usr_remark'} = $1;
411 elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
412 $_contigs{$ctgname}{'trace_remark'} = $1;
414 elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
416 my $grpmatch = $2;
417 my $pos = $10;
418 if ($pos eq "NaN") {
419 $pos = 0;
420 print "Warning: Nan encountered for Contig position \n";
422 $_contigs{$ctgname}{'chr_remark'} = $6;
423 $_contigs{$ctgname}{'position'} = $pos;
424 $_contigs{$ctgname}{'subgroup'} = 0;
426 if (defined($grpmatch)) {
427 $_contigs{$ctgname}{'anchor'} = 1;
429 if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
431 my ($group,$subgroup);
432 $group = $2 if($grpabbr eq "Chr");
433 $subgroup = $3 if($grpabbr eq "Chr");
435 $group = $7 if($grpabbr eq "Lg");
436 $subgroup = $8 if($grpabbr eq "Lg");
438 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
439 $_contigs{$ctgname}{'group'} = $group;
440 $_contigs{$ctgname}{'subgroup'} = $subgroup;
443 else {
444 $_contigs{$ctgname}{'group'} = $grpmatch;
447 else {
448 $_contigs{$ctgname}{'anchor'} = 1;
449 $_contigs{$ctgname}{'group'} = "?";
452 $curContig++;
453 print "Adding Contig $curContig...\n"
454 if ($self->verbose() && $curContig % 100 == 0);
457 $map->_setContigRef(\%_contigs);
458 $map->_calc_markerposition();
459 $map->_calc_contigposition() if ($map->version() < 7.0);
460 $map->_calc_contiggroup() if ($map->version() == 4.6);
462 return $map;
466 =head2 write_map
468 Title : write_map
469 Usage : $mapio->write_map($map);
470 Function: Write a map out
471 Returns : none
472 Args : Bio::Map::MapI
474 =cut
476 sub write_map{
477 my ($self,@args) = @_;
478 $self->throw_not_implemented();
483 =head1 FEEDBACK
485 =head2 Mailing Lists
487 User feedback is an integral part of the evolution of this and other
488 Bioperl modules. Send your comments and suggestions preferably to
489 the Bioperl mailing list. Your participation is much appreciated.
491 bioperl-l@bioperl.org - General discussion
492 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
494 =head2 Reporting Bugs
496 Report bugs to the Bioperl bug tracking system to help us keep track
497 of the bugs and their resolution. Bug reports can be submitted via the
498 web:
500 http://bugzilla.open-bio.org/
502 =head1 AUTHOR - Gaurav Gupta
504 Email gaurav@genome.arizona.edu
506 =head1 PROJECT LEADERS
508 Jamie Hatfield jamie@genome.arizona.edu
510 Dr. Cari Soderlund cari@genome.arizona.edu
512 =head1 PROJECT DESCRIPTION
514 The project was done in Arizona Genomics Computational Laboratory
515 (AGCoL) at University of Arizona.
517 This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
518 for the Computation and Display of Physical Mapping Data".
520 For more information on this project, please refer:
521 http://www.genome.arizona.edu
523 =head1 APPENDIX
525 The rest of the documentation details each of the object methods.
526 Internal methods are usually preceded with a _
528 =cut