1 #!/usr/local/bin/perl -w
4 use Bio
::DB
::Map
::SQL
::DBAdaptor
;
5 use Bio
::DB
::Map
::Marker
;
13 use HTTP
::Request
::Common
;
16 use vars
qw($MARKERDATA $MARSHFIELDURL $NCBIDATAURL $STSDATA
17 $NCBI_STSURL $ONLINE $DEBUG);
18 $MARSHFIELDURL = 'http://marshfieldclinic.org/research/genetics/Map_Markers/data';
19 $MARKERDATA = '/tmp/markers/marshfield';
20 $STSDATA = '/tmp/markers/human.sts';
22 $NCBI_STSURL = 'ftp://ftp.ncbi.nlm.nih.gov/repository/dbSTS/human.sts';
26 #set proxy stuff here where applicable
27 my $ua = new LWP::UserAgent();
31 my $host = 'localhost';
33 my $dbname = 'markerdb';
36 my $module = 'Bio::DB::Map::SQL::DBAdaptor';
43 'db|dbname:s' => \$dbname,
44 'user|dbuser:s' => \$dbuser,
45 'p|dbpass:s' => \$dbpass,
46 'm|module:s' => \$module,
49 my %props = ( '-host' => $host,
53 if( defined $dbpass ) {
54 $props{'-dbpass'} = $dbpass;
57 my $db = Bio::DB::Map::SQL::DBAdaptor->new( %props );
59 my $mapadaptor = $db->get_MapAdaptor();
61 my $markeradaptor = $db->get_MarkerAdaptor();
63 my @mapnames = qw(marshfield marshfield_female marshfield_male);
66 foreach my $name ( @mapnames ) {
67 $maps{$name} = $mapadaptor->get('-name' => $name);
71 my $STS = &get_sts_data
();
75 my ($id,$fwd,$rev,$len,$locus,$chrom,$genbank) = split;
80 $sts{$locus} = [ $fwd,$rev,$len ];
81 if( $genbank eq '-' ) {
83 } elsif( $genbank =~ /;/ ) {
84 foreach my $g ( split(/;/,$genbank) ) {
85 $sts{$g} = [ $fwd,$rev,$len ];
88 $sts{$genbank} = [ $fwd,$rev,$len ];
95 foreach my $chrom ( 1..23 ) {
96 my $MAP = &get_map_data_for_chrom
($chrom);
98 # skip the 1st 2 header lines
103 my ($id,$probe,$locus,@mapvals) = split;
105 if( $id !~ /^\d+$/ ) {
106 # something is wrong with the format
107 die("got an unexpected line: $_");
109 $locus = '' unless ( $locus !~ /Unknown/i );
110 $markers{$probe} = new Bio
::DB
::Map
::Marker
( -probe
=> $probe,
115 foreach my $name ( @mapnames ) {
116 $markers{$probe}->add_position(shift @mapvals, $name);
119 <$MAP>; # skip the next line because it is just
120 # the distance between markers
125 my $INFO = &get_info_data_for_chrom
($chrom);
128 my (@requests,@requests_probes);
133 my ($probe,$locus,$genbank) = split;
135 if( $locus =~ /Unknown/i ) {
139 if( $genbank =~ /Unknown/i ) {
143 # I checked an one locus appears twice
144 # and these are actually the same marker
148 if( $sts{$genbank} ) {
149 $stsval = $sts{$genbank};
150 } elsif( $sts{$locus} ) {
151 $stsval = $sts{$locus};
152 } elsif( $sts{$probe} ) {
153 $stsval = $sts{$probe};
157 print "could not find stsval for $probe $locus $genbank\n" if($DEBUG);
160 if( $markers{$probe} ) {
161 $marker = $markers{$probe};
162 } elsif( $markers{$locus} ) {
163 $marker = $markers{$locus};
164 } elsif( $markers{$genbank} ) {
165 $marker = $markers{$genbank};
168 print "unable to find marker for $probe $locus $genbank\n" if($DEBUG);
172 $marker->pcrfwd($stsval->[0]);
173 $marker->pcrrev($stsval->[1]);
174 $marker->length($stsval->[2]);
175 $markers{$marker->probe} = $marker;
180 my ($count,$total,$duplicate) = (0,0,0);
181 foreach my $marker ( values %markers ) {
183 if( ! $marker->pcrfwd ) {
185 $markeradaptor->warn("no pcr info, skipping \n".
191 if( ! $markeradaptor->write($marker) ) {
193 if( ! $markeradaptor->add_duplicate_marker($marker) ) {
194 print STDERR
"no duplicate marker found for ", $marker->to_string(), "\n";
198 print "No primers for $count, $duplicate duplicates, out of $total\n";
200 sub get_map_data_for_chrom
{
204 my $url = sprintf('%s/maps/map%d.txt', $MARSHFIELDURL,$chrom);
205 my $request = GET
$url;
206 my $response = $ua->request($request);
207 if( $response->is_success ) {
208 $fh = new IO
::String
($response->content);
210 warn(sprintf"Error: Request was %s error was %s",
211 $request->as_string(),
212 $response->error_as_HTML);
216 my $file = sprintf('< %s/maps/map%d.txt', $MARKERDATA,
218 $fh = new IO
::File
($file) or do {
219 warn("cannot open $file");
225 sub get_info_data_for_chrom
{
229 my $url = sprintf('%s/info/info%d.txt', $MARSHFIELDURL,$chrom);
230 my $request = GET
$url;
231 my $response = $ua->request($request);
232 if( $response->is_success ) {
233 $fh = new IO
::String
($response->content);
235 warn(sprintf"Error: Request was %s error was %s",
236 $request->as_string(),
237 $response->error_as_HTML);
241 my $file = sprintf('< %s/info/info%d.txt', $MARKERDATA,
243 $fh = new IO
::File
($file) or do {
244 warn("cannot open $file");
253 my $request = GET
$NCBI_STSURL;
254 my $response = $ua->request($request);
255 if( $response->is_success ) {
256 $fh = new IO
::String
($response->content);
258 warn(sprintf"Error: Request was %s error was %s",
259 $request->as_string(),
260 $response->error_as_HTML);
264 $fh = new IO
::File
("< $STSDATA") or do {
265 warn("cannot open $STSDATA");