added -online flag, -debug flag
[bioperl-db.git] / scripts / load_marshfield_map.pl
bloba938b8b782448f863a7b77aa8f5db9b997f0a07a
1 #!/usr/local/bin/perl -w
2 use strict;
4 use Bio::DB::Map::SQL::DBAdaptor;
5 use Bio::DB::Map::Marker;
6 use Bio::DB::Map::Map;
7 use Bio::SeqIO;
8 use IO::File;
9 use IO::String;
10 use Carp;
12 use LWP::UserAgent;
13 use HTTP::Request::Common;
15 BEGIN {
16 use vars qw($MARKERDATA $MARSHFIELDURL $NCBIDATAURL $STSDATA
17 $NCBI_STSURL $ONLINE $DEBUG);
18 $MARSHFIELDURL = 'http://marshfieldclinic.org/research/genetics/Map_Markers/data';
19 $MARKERDATA = '/tmp/markers/marshfield';
20 $STSDATA = '/tmp/markers/human.sts';
22 $NCBI_STSURL = 'ftp://ftp.ncbi.nlm.nih.gov/repository/dbSTS/human.sts';
23 $ONLINE = 0;
24 $DEBUG = 1;
26 #set proxy stuff here where applicable
27 my $ua = new LWP::UserAgent();
29 use Getopt::Long;
31 my $host = 'localhost';
32 my $port = 3306;
33 my $dbname = 'markerdb';
34 my $dbuser = 'root';
35 my $dbpass = 'undef';
36 my $module = 'Bio::DB::Map::SQL::DBAdaptor';
38 &GetOptions(
39 'online' => \$ONLINE,
40 'debug' => \$DEBUG,
41 'host:s' => \$host,
42 'port:n' => \$port,
43 'db|dbname:s' => \$dbname,
44 'user|dbuser:s' => \$dbuser,
45 'p|dbpass:s' => \$dbpass,
46 'm|module:s' => \$module,
49 my %props = ( '-host' => $host,
50 '-dbname' => $dbname,
51 '-user' => $dbuser);
53 if( defined $dbpass ) {
54 $props{'-dbpass'} = $dbpass;
57 my $db = Bio::DB::Map::SQL::DBAdaptor->new( %props );
59 my $mapadaptor = $db->get_MapAdaptor();
61 my $markeradaptor = $db->get_MarkerAdaptor();
63 my @mapnames = qw(marshfield marshfield_female marshfield_male);
64 my %maps;
66 foreach my $name ( @mapnames ) {
67 $maps{$name} = $mapadaptor->get('-name' => $name);
70 my %sts;
71 my $STS = &get_sts_data();
72 exit if( ! $STS );
73 # build STS
74 while(<$STS>) {
75 my ($id,$fwd,$rev,$len,$locus,$chrom,$genbank) = split;
77 my $probe = '';
78 $locus =~ s/CHLC\.//;
80 $sts{$locus} = [ $fwd,$rev,$len ];
81 if( $genbank eq '-' ) {
82 # do nothing
83 } elsif( $genbank =~ /;/ ) {
84 foreach my $g ( split(/;/,$genbank) ) {
85 $sts{$g} = [ $fwd,$rev,$len ];
87 } else {
88 $sts{$genbank} = [ $fwd,$rev,$len ];
91 close($STS);
93 # read in the maps
94 my %markers;
95 foreach my $chrom ( 1..23 ) {
96 my $MAP = &get_map_data_for_chrom($chrom);
97 last if ( ! $MAP);
98 # skip the 1st 2 header lines
99 <$MAP>; <$MAP>;
100 while(<$MAP>) {
101 s/^\*//;
102 s/^x//;
103 my ($id,$probe,$locus,@mapvals) = split;
105 if( $id !~ /^\d+$/ ) {
106 # something is wrong with the format
107 die("got an unexpected line: $_");
109 $locus = '' unless ( $locus !~ /Unknown/i );
110 $markers{$probe} = new Bio::DB::Map::Marker ( -probe => $probe,
111 -locus => $locus,
112 -chrom => $chrom,
113 -type => 'msat');
115 foreach my $name ( @mapnames ) {
116 $markers{$probe}->add_position(shift @mapvals, $name);
119 <$MAP>; # skip the next line because it is just
120 # the distance between markers
122 close($MAP);
124 my %info;
125 my $INFO = &get_info_data_for_chrom($chrom);
126 last if ( ! $INFO);
127 <$INFO>; <$INFO>;
128 my (@requests,@requests_probes);
129 while(<$INFO>) {
130 s/^\*//;
131 s/^x//;
133 my ($probe,$locus,$genbank) = split;
135 if( $locus =~ /Unknown/i ) {
136 $locus = $probe;
139 if( $genbank =~ /Unknown/i ) {
140 $genbank = $locus;
143 # I checked an one locus appears twice
144 # and these are actually the same marker
145 # M758B6-1/M758B621
146 my $stsval = undef;
147 my $marker = undef;
148 if( $sts{$genbank} ) {
149 $stsval = $sts{$genbank};
150 } elsif( $sts{$locus} ) {
151 $stsval = $sts{$locus};
152 } elsif( $sts{$probe} ) {
153 $stsval = $sts{$probe};
156 if( ! $stsval ) {
157 print "could not find stsval for $probe $locus $genbank\n" if($DEBUG);
158 next; }
160 if( $markers{$probe} ) {
161 $marker = $markers{$probe};
162 } elsif( $markers{$locus} ) {
163 $marker = $markers{$locus};
164 } elsif( $markers{$genbank} ) {
165 $marker = $markers{$genbank};
167 if( ! $marker ) {
168 print "unable to find marker for $probe $locus $genbank\n" if($DEBUG);
169 next;
172 $marker->pcrfwd($stsval->[0]);
173 $marker->pcrrev($stsval->[1]);
174 $marker->length($stsval->[2]);
175 $markers{$marker->probe} = $marker;
177 close($INFO);
180 my ($count,$total,$duplicate) = (0,0,0);
181 foreach my $marker ( values %markers ) {
182 $total++;
183 if( ! $marker->pcrfwd ) {
184 if( $DEBUG ) {
185 $markeradaptor->warn("no pcr info, skipping \n".
186 $marker->to_string);
188 $count++;
189 next;
191 if( ! $markeradaptor->write($marker) ) {
192 $duplicate++;
193 if( ! $markeradaptor->add_duplicate_marker($marker) ) {
194 print STDERR "no duplicate marker found for ", $marker->to_string(), "\n";
198 print "No primers for $count, $duplicate duplicates, out of $total\n";
200 sub get_map_data_for_chrom {
201 my ($chrom) = @_;
202 my $fh;
203 if( $ONLINE ) {
204 my $url = sprintf('%s/maps/map%d.txt', $MARSHFIELDURL,$chrom);
205 my $request = GET $url;
206 my $response = $ua->request($request);
207 if( $response->is_success ) {
208 $fh = new IO::String($response->content);
209 } else {
210 warn(sprintf"Error: Request was %s error was %s",
211 $request->as_string(),
212 $response->error_as_HTML);
213 $fh = undef;
215 } else {
216 my $file = sprintf('< %s/maps/map%d.txt', $MARKERDATA,
217 $chrom);
218 $fh = new IO::File($file) or do {
219 warn("cannot open $file");
220 $fh = undef; };
222 return $fh;
225 sub get_info_data_for_chrom {
226 my ($chrom) = @_;
227 my $fh;
228 if( $ONLINE ) {
229 my $url = sprintf('%s/info/info%d.txt', $MARSHFIELDURL,$chrom);
230 my $request = GET $url;
231 my $response = $ua->request($request);
232 if( $response->is_success ) {
233 $fh = new IO::String($response->content);
234 } else {
235 warn(sprintf"Error: Request was %s error was %s",
236 $request->as_string(),
237 $response->error_as_HTML);
238 $fh = undef;
240 } else {
241 my $file = sprintf('< %s/info/info%d.txt', $MARKERDATA,
242 $chrom);
243 $fh = new IO::File($file) or do {
244 warn("cannot open $file");
245 $fh = undef; };
247 return $fh;
250 sub get_sts_data {
251 my $fh;
252 if( $ONLINE ) {
253 my $request = GET $NCBI_STSURL;
254 my $response = $ua->request($request);
255 if( $response->is_success ) {
256 $fh = new IO::String($response->content);
257 } else {
258 warn(sprintf"Error: Request was %s error was %s",
259 $request->as_string(),
260 $response->error_as_HTML);
261 $fh = undef;
263 } else {
264 $fh = new IO::File("< $STSDATA") or do {
265 warn("cannot open $STSDATA");
266 $fh = undef;
269 return $fh;