added -online flag, -debug flag
[bioperl-db.git] / scripts / load_genethon_data.pl
blobe123ca05b86e8b54af7b17a65cfa92f766e66c5c
1 #!/usr/local/bin/perl -w
2 use strict;
4 use Bio::DB::Map::SQL::DBAdaptor;
5 use Bio::DB::Map::Marker;
6 use Bio::DB::Map::Map;
7 use Bio::SeqIO;
8 use IO::File;
9 use IO::String;
10 use Carp;
12 use LWP::UserAgent;
13 use HTTP::Request::Common;
15 BEGIN {
16 use vars qw($MARKERDATA $GENETHONURL $GENETHONDATA
17 $ONLINE $DEBUG);
18 $GENETHONURL = 'ftp://ftp.genethon.fr/pub/Gmap/Nature-1995/data';
19 $GENETHONDATA = '/tmp/markers/genethon';
20 $ONLINE = 0;
21 $DEBUG = 1;
23 #set proxy stuff here where applicable
24 my $ua = new LWP::UserAgent();
26 use Getopt::Long;
28 my $host = 'localhost';
29 my $port = 3306;
30 my $dbname = 'markerdb';
31 my $dbuser = 'root';
32 my $dbpass = 'undef';
33 my $module = 'Bio::DB::Map::SQL::DBAdaptor';
35 &GetOptions(
36 'online' => \$ONLINE,
37 'debug' => \$DEBUG,
38 'host:s' => \$host,
39 'port:n' => \$port,
40 'db|dbname:s' => \$dbname,
41 'user|dbuser:s' => \$dbuser,
42 'p|dbpass:s' => \$dbpass,
43 'm|module:s' => \$module,
46 my %props = ( '-host' => $host,
47 '-dbname' => $dbname,
48 '-user' => $dbuser);
50 if( defined $dbpass ) {
51 $props{'-dbpass'} = $dbpass;
54 my $db = Bio::DB::Map::SQL::DBAdaptor->new( %props );
56 my $mapadaptor = $db->get_MapAdaptor();
58 my $markeradaptor = $db->get_MarkerAdaptor();
60 # read in the maps
61 my %markers;
62 foreach my $chrom ( 1..23 ) {
63 my ($DATA) = &get_genethon_data($chrom);
65 while(<$DATA>) {
66 s/\*//g;
67 my ($probe,$sexavg,$female,$male,$locus,$genbank,
68 $allelect,$heterozygosity,$fwd,$rev,$genotype,$minmax);
69 if( $chrom < 23 ) {
70 ($probe,$sexavg,$female,$male,$locus,$genbank,
71 $allelect,$heterozygosity,$fwd,$rev,$genotype,$minmax) = split;
72 } else {
73 ($probe,$sexavg,$locus,$genbank,
74 $allelect,$heterozygosity,$fwd,$rev,$genotype,$minmax) = split;
76 if( $sexavg !~ /^\d+(\.\d+)?$/ ) { next; } # I'm so bad, would rather skip this
77 # line that try and figure how to parse
78 # it correctly when map info is omitted.
80 my ($min,$max) = split('-',$minmax);
81 my $len = ($min + $max) / 2;
83 my $marker = new Bio::DB::Map::Marker( '-locus' => $locus,
84 '-probe' => $probe,
85 '-pcrfwd' => $fwd,
86 '-pcrrev' => $rev,
87 '-length' => $len,
88 '-chrom' => $chrom,
89 '-type' => 'msat');
90 if( $genbank ) {
91 $marker->add_alias($genbank, 'genbank');
93 $marker->add_position($sexavg, 'genethon');
94 $marker->add_position($male, 'genethon_male') if( $male);
95 $marker->add_position($female, 'genethon_female') if( $female);
96 $markers{$probe} = $marker;
99 my ($count,$total,$duplicate) = (0,0,0);
100 foreach my $marker ( values %markers ) {
101 $total++;
102 if( ! $marker->pcrfwd ) {
103 if( $DEBUG ) {
104 $markeradaptor->warn("no pcr info, skipping \n".
105 $marker->to_string);
107 $count++;
109 if( ! $markeradaptor->write($marker) ) {
110 $duplicate++;
111 $markeradaptor->add_duplicate_marker($marker);
115 print "No primers for $count, $duplicate duplicates, out of $total\n";
117 sub get_genethon_data {
118 my ($chrom) = @_;
119 $chrom =~ s/23/X/;
121 my $fh;
122 if( $ONLINE ) {
123 my $request = GET $GENETHONURL. "/data_chrom$chrom";
124 my $response = $ua->request($request);
125 if( $response->is_success ) {
126 $fh = new IO::String($response->content);
127 } else {
128 warn(sprintf"Error: Request was %s error was %s",
129 $request->as_string(),
130 $response->error_as_HTML);
131 $fh = undef;
133 } else {
134 $fh = new IO::File("< $GENETHONDATA/data_chrom$chrom") or do {
135 warn("cannot open $GENETHONDATA/data_chrom$chrom");
136 $fh = undef;
139 return $fh;