1 # fpc.pm,v 1.2.2.1 2005/10/09 15:16:27 jason Exp
3 # BioPerl module for Bio::MapIO::fpc
5 # Cared for by Gaurav Gupta <gaurav@genome.arizona.edu>
9 # You may distribute this module under the same terms as perl itself
11 # POD documentation - main docs before the code
15 Bio::MapIO::fpc - A FPC Map reader
19 # do not use this object directly it is accessed through the Bio::MapIO system
23 -format : specifies the format of the file format is "fpc",
24 -file : specifies the name of the .fpc file
25 -readcor : boolean argument, indicating if .cor is to be read
26 or not. It looks for the .cor file in the same path
28 0 : doesn't read .cor file
29 1 : reads the .cor file
31 -verbose : indicates the process of loading of fpc file
32 my $mapio = Bio::MapIO->new(-format => "fpc",
37 my $map = $mapio->next_map();
39 foreach my $marker ( $map->each_markerid() ) {
40 # loop through the markers associated with the map
41 # likewise for contigs, clones, etc.
47 This object contains code for parsing and processing FPC files and creating
48 L<Bio::Map::Physical> object from it.
50 For faster access and better optimization, the data is stored internally in
51 hashes. The corresponding objects are created on request.
53 We handle reading of the FPC ourselves, since MapIO module of Bioperl adds
58 # Let the code begin...
60 package Bio
::MapIO
::fpc
;
64 use Bio
::Map
::Physical
;
67 use Bio
::Map
::FPCMarker
;
70 use base
qw(Bio::MapIO);
79 Usage : called implicitly
80 Function: calls the SUPER::_initialize
82 Args : species, readcor
87 my ($self,@args) = @_;
89 $self->SUPER::_initialize
(@args);
90 ($species,$_readcor) = $self->_rearrange([qw(SPECIES READCOR)], @args);
91 $_readcor = 0 unless (defined($_readcor));
96 These methods let you get and set the member variables
101 Usage : my $fpcmap = $mapio->next_map();
102 Function: gets the fpcmap from MapIO
103 Returns : object of type L<Bio::Map::MapI>
113 my ($name,$fpcver,$moddate,$moduser,$contigcnt,$clonecnt,$markerscnt,
114 $bandcnt,$marker,$seqclone);
115 my ($corfile,$corindex,$BUFFER);
118 my ($contig, $contigNumber);
127 my $map = Bio
::Map
::Physical
->new('-units' => 'CB',
128 '-type' => 'physical');
130 my $filename = $self->file();
131 my $fh = $self->{'_filehandle'};
133 if (defined($_readcor)) {
134 $map->core_exists($_readcor);
137 $map->core_exists(0);
140 if ($map->core_exists()) {
141 $corfile = substr($filename,0,length($filename)-3)."cor";
142 if (open(CORE
,$corfile)) {
143 while(read(CORE
,$BUFFER,2)) {
144 push(@cordata,unpack('n*', $BUFFER));
148 $map->core_exists(0);
152 ## Read in the header
153 while (defined($line = <$fh>)) {
156 if ($line =~ m{^//\s+fpc\s+project\s+(.+)}) { $map->name($1); }
157 if ($line =~ m{^//\s+([\d.]+)}) {
159 $version =~ /((\d+)\.(\d+))(.*)/;
161 if ($line =~ /User:\s+(.+)/) { $map->modification_user($1); }
164 if ($line =~ m{^//\s+Framework\s+(\w+)\s+(\w+)\s+([-\w]+)\s+(\w+)\s+(\w+)\s+(.+)$})
166 $map->group_type($3) if ($2 eq "Label");
167 $map->group_abbr($5) if ($4 eq "Abbrev");
170 last unless ($line =~ m{^//});
173 if (!defined($map->group_type()) || !defined($map->group_abbr()) ) {
174 $map->group_type("Chromosome");
175 $map->group_abbr("Chr");
178 $_contigs{0}{'range'}{'end'} = 0;
179 $_contigs{0}{'range'}{'start'} = 0;
181 ## Read in the clone data
182 while (defined($line = <$fh>)) {
189 my (@amatch,@pmatch,@ematch);
193 last if ($line =~ /^Markerdata/);
196 $line =~ /^(\w+)\s+:\s+"(.+)"/;
198 ## these will be set if we did find the clone line
199 ($type, $name) = ($1, $2);
201 if ($name =~ /sd1/) {
205 $_clones{$name}{'type'} = $type;
206 $_clones{$name}{'contig'} = 0;
207 $_contigs{'0'}{'clones'}{$name} = 0;
211 ## Loop through the following lines, getting attributes for clone
212 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
214 if ($line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([-\d]+)/) {
215 $_clones{$name}{'contig'} = $1;
216 $_contigs{$1}{'clones'}{$name} = 0;
218 delete($_contigs{'0'}{'clones'}{$name});
223 $line =~ /^Map "ctg(\d+)" Ends (Left|Right) ([\d]+)/;
224 $_clones{$name}{'range'}{'start'} = $temp;
226 $_contigs{$contigNumber}{'range'}{'start'} = $temp
227 if (!exists($_contigs{$contigNumber}{'range'}{'start'})
228 || $_contigs{$contigNumber}{'range'}{'start'}
231 $_clones{$name}{'range'}{'end'} = $3;
233 $_contigs{$contigNumber}{'range'}{'end'} = $3
234 if (!exists($_contigs{$contigNumber}{'range'}{'end'})
235 || $_contigs{$contigNumber}{'range'}{'end'} < $3 );
238 elsif ($line =~ /^([a-zA-Z]+)_match_to_\w+\s+"(.+)"/) {
239 my $matchtype = "match" . lc(substr($1, 0, 1));
240 $_clones{$name}{$matchtype}{$2} = 0;
242 elsif ($line =~ /^Positive_(\w+)\s+"(.+)"/) {
243 $_clones{$name}{'markers'}{$2} = 0;
244 $_markers{$2}{'clones'}{$name} = 0;
245 $_markers{$2}{'type'} = $1;
246 $_markers{$2}{'contigs'}{$contigNumber} = 0;
247 $_contigs{$contigNumber}{'markers'}{$2} = 0;
249 elsif ($line =~ /^Bands\s+(\d+)\s+(\d+)/ && !$bandsread) {
254 if ($map->core_exists()) {
256 push(@numbands,$cordata[($1-1)+$i]);
259 $_clones{$name}{'bands'} = \
@numbands;
262 push(@numbands,$1,$2);
263 $_clones{$name}{'bands'} = \
@numbands;
265 if (exists($_contigs{0}{'clones'}{$name})) {
266 $_clones{$name}{'range'}{'start'} = $ctgzeropos;
267 $_clones{$name}{'range'}{'end'} = $ctgzeropos + $2;
268 $_contigs{0}{'range'}{'end'} = $ctgzeropos + $2;
272 elsif ($line =~ /^Gel_number\s+(.+)/) {
273 $_clones{$name}{'gel'} = $1;
275 elsif ($line =~ /^Remark\s+"(.+)"/) {
276 $_clones{$name}{'remark'} .= $1;
277 $_clones{$name}{'remark'} .= "\n";
278 if($seqclone == 1 ) {
279 if( $1 =~ /\,\s+Chr(\d+)\s+/){
280 $_clones{$name}{'group'} = $1;
284 elsif ($line =~ /^Fp_number\s+"(.+)"/) {
285 $_clones{$name}{'fp_number'} = $1;
287 elsif ($line =~ /^Shotgun\s+(\w+)\s+(\w+)/) {
288 $_clones{$name}{'sequence_type'} = $1;
289 $_clones{$name}{'sequence_status'} = $2;
291 elsif ($line =~ /^Fpc_remark\s+"(.+)"/) {
292 $_clones{$name}{'fpc_remark'} .= $1;
293 $_clones{$name}{'fpc_remark'} .= "\n";
298 print "Adding clone $curClone...\n\r"
299 if ($self->verbose() && $curClone % 1000 == 0);
302 $map->_setCloneRef(\
%_clones);
305 while (defined($line = <$fh>) && $line !~ /Contigdata/) {
308 last if ($line !~ /^Marker_(\w+)\s+:\s+"(.+)"/);
310 ($type, $name) = ($1, $2);
312 $_markers{$name}{'type'} = $type;
313 $_markers{$name}{'group'} = 0;
314 $_markers{$name}{'global'} = 0;
315 $_markers{$name}{'anchor'} = 0;
317 while (defined($line = <$fh>) && $line !~ /^\s*\n$/) {
318 if ($line =~ /^Global_position\s+([\d.]+)\s*(Frame)?/) {
319 my $position = $1 - floor
($1/1000)*1000;
320 $position = sprintf("%.2f",$position);
322 $_markers{$name}{'global'} = $position;
323 $_markers{$name}{'group'} = floor
($1/1000);
324 $_markers{$name}{'anchor'} = 1;
327 $_markers{$name}{'framework'} = 1;
330 $_markers{$name}{'framework'} = 0;
333 elsif ($line =~ /^Anchor_bin\s+"([\w\d.]+)"/) {
335 my $grptype = $map->group_type();
337 $grpmatch =~ /(\d+|\w)(.*)/;
339 my ($group,$subgroup);
343 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
345 $_markers{$name}{'group'} = $group;
346 $_markers{$name}{'subgroup'} = $subgroup;
348 elsif ($line =~ /^Anchor_pos\s+([\d.]+)\s+(F|P)?/){
349 $_markers{$name}{'global'} = $1;
350 $_markers{$name}{'anchor'} = 1;
353 $_markers{$name}{'framework'} = 1;
356 $_markers{$name}{'framework'} = 0;
359 elsif ($line =~ /^anchor$/) {
360 $_markers{$name}{'anchor'} = 1;
362 elsif ($line =~ /^Remark\s+"(.+)"/) {
363 $_markers{$name}{'remark'} .= $1;
364 $_markers{$name}{'remark'} .= "\n";
368 print "Adding Marker $curMarker...\n"
369 if ($self->verbose() && $curMarker % 1000 == 0);
372 $map->_setMarkerRef(\
%_markers);
375 my $grpabbr = $map->group_abbr();
378 $_contigs{0}{'group'} = 0;
380 while (defined($line = <$fh>)) {
382 if ($line =~ /^Ctg(\d+)/) {
384 $_contigs{$ctgname}{'group'} = 0;
385 $_contigs{$ctgname}{'anchor'} = 0;
386 $_contigs{$ctgname}{'position'} = 0;
388 if ($line =~ /#\w*(.*)\w*$/) {
389 $_contigs{$ctgname}{'remark'} = $1;
390 if ($line =~ /#\s+Chr(\d+)\s+/) {
391 $_contigs{$ctgname}{'group'} = $1;
392 $_contigs{$ctgname}{'anchor'} = 1;
396 elsif ($line =~ /^Chr_remark\s+"(-|\+|Chr(\d+))\s+(.+)"$/) {
398 $_contigs{$ctgname}{'anchor'} = 1;
399 $_contigs{$ctgname}{'chr_remark'} = $3 if(defined($3));
402 $_contigs{$ctgname}{'group'} = $2;
405 $_contigs{$ctgname}{'group'} = "?";
408 elsif ($line =~ /^User_remark\s+"(.+)"/) {
409 $_contigs{$ctgname}{'usr_remark'} = $1;
411 elsif ($line =~ /^Trace_remark\s+"(.+)"/) {
412 $_contigs{$ctgname}{'trace_remark'} = $1;
414 elsif ($grpabbr && $line =~ /^Chr_remark\s+"(\W|$grpabbr((\d+)|(\w+)|([.\w\d]+)))\s*(\{(.*)\}|\[(.*)\])?"\s+(Pos\s+((\d.)+|NaN))(NOEDIT)?/)
420 print "Warning: Nan encountered for Contig position \n";
422 $_contigs{$ctgname}{'chr_remark'} = $6;
423 $_contigs{$ctgname}{'position'} = $pos;
424 $_contigs{$ctgname}{'subgroup'} = 0;
426 if (defined($grpmatch)) {
427 $_contigs{$ctgname}{'anchor'} = 1;
429 if ($grpmatch =~ /((\d+)((\D\d.\d+)|(.\d+)))|((\w+)(\.\d+))/) {
431 my ($group,$subgroup);
432 $group = $2 if($grpabbr eq "Chr");
433 $subgroup = $3 if($grpabbr eq "Chr");
435 $group = $7 if($grpabbr eq "Lg");
436 $subgroup = $8 if($grpabbr eq "Lg");
438 $subgroup = substr($subgroup,1) if ($subgroup =~ /^\./);
439 $_contigs{$ctgname}{'group'} = $group;
440 $_contigs{$ctgname}{'subgroup'} = $subgroup;
444 $_contigs{$ctgname}{'group'} = $grpmatch;
448 $_contigs{$ctgname}{'anchor'} = 1;
449 $_contigs{$ctgname}{'group'} = "?";
453 print "Adding Contig $curContig...\n"
454 if ($self->verbose() && $curContig % 100 == 0);
457 $map->_setContigRef(\
%_contigs);
458 $map->_calc_markerposition();
459 $map->_calc_contigposition() if ($map->version() < 7.0);
460 $map->_calc_contiggroup() if ($map->version() == 4.6);
469 Usage : $mapio->write_map($map);
470 Function: Write a map out
472 Args : Bio::Map::MapI
477 my ($self,@args) = @_;
478 $self->throw_not_implemented();
487 User feedback is an integral part of the evolution of this and other
488 Bioperl modules. Send your comments and suggestions preferably to
489 the Bioperl mailing list. Your participation is much appreciated.
491 bioperl-l@bioperl.org - General discussion
492 http://bioperl.org/wiki/Mailing_lists - About the mailing lists
494 =head2 Reporting Bugs
496 Report bugs to the Bioperl bug tracking system to help us keep track
497 of the bugs and their resolution. Bug reports can be submitted via the
500 http://bugzilla.open-bio.org/
502 =head1 AUTHOR - Gaurav Gupta
504 Email gaurav@genome.arizona.edu
506 =head1 PROJECT LEADERS
508 Jamie Hatfield jamie@genome.arizona.edu
510 Dr. Cari Soderlund cari@genome.arizona.edu
512 =head1 PROJECT DESCRIPTION
514 The project was done in Arizona Genomics Computational Laboratory
515 (AGCoL) at University of Arizona.
517 This work was funded by USDA-IFAFS grant #11180 titled "Web Resources
518 for the Computation and Display of Physical Mapping Data".
520 For more information on this project, please refer:
521 http://www.genome.arizona.edu
525 The rest of the documentation details each of the object methods.
526 Internal methods are usually preceded with a _