fix file extention cleaning
[phenome.git] / bin / loading_scripts / LoadTomDelSGN.pm
blob2d36b89d2a1211b8a263a725b6c1c0c13d802e67
1 package LoadTomDelSGN;
3 use Modern::Perl;
4 use CXGN::DB::InsertDBH;
5 use File::Slurp;
6 use File::Basename;
7 use CXGN::Phenome::Locus;
8 use Bio::Chado::Schema;
10 use Moose;
11 with 'MooseX::Runnable';
12 with 'MooseX::Getopt';
14 has "dbh" => (
15 is => 'rw',
16 isa => 'Ref',
17 traits => ['NoGetopt'],
18 required => 0,
21 has "dbhost" => (
22 is => 'rw',
23 isa => 'Str',
24 required => 1,
25 traits => ['Getopt'],
26 cmd_aliases => 'H',
27 documentation => 'required, database host',
30 has "dbname" => (
31 is => 'rw',
32 isa => 'Str',
33 required => 1,
34 traits => ['Getopt'],
35 documentation => 'required, database name',
36 cmd_aliases => 'D',
39 has "dirname" => (
40 is => 'rw',
41 isa => 'Str',
42 required => 1,
43 traits => ['Getopt'],
44 documentation => 'required, input directory name with TomDel files',
45 cmd_aliases => 'i',
48 has 'trial' => (
49 is => 'rw',
50 isa => 'Bool',
51 required => 0,
52 default => 0,
53 traits => ['Getopt'],
54 cmd_aliases => 't',
55 documentation =>
56 'Test run. Rollback the transaction.',
59 sub run {
60 my ($self,$name) = @_;
62 my $dbh = CXGN::DB::InsertDBH->new(
64 dbname =>$self->dbname,
65 dbhost => $self->dbhost,
67 )->get_actual_dbh();
69 #$dbh->{AutoCommit} = 1;
70 $self->dbh($dbh);
71 my $schema= Bio::Chado::Schema->connect( sub { $dbh } , { on_connect_do => ['SET search_path TO public;'] }
73 my $dirname = $self->dirname;
75 my @files = glob "$dirname/*.pdf";
77 my $db = $schema->resultset("General::Db")->find_or_create( {
78 name => 'TomDel',
79 urlprefix => 'https://',
80 url => 'solgenomics.net/ftp/TomDel-0.1/',
81 });
82 my $counter=0;
83 foreach my $file (@files) {
84 chomp $file;
85 my $filename = basename($file);
86 #remove the extension
87 my ($clean_filename, $ext) = split(/\.pdf/ , $filename);
88 my ($gene_model, $chr, $position) = split (/_/ , $clean_filename ) ;
89 #Solyc01g005000.2_SL2.50ch01_14034.pdf
90 #chromosome: SL2.50ch08, position: 1734498, link to genotype frequencies in tomato populations
91 my $sgn_locusname = $gene_model;
92 if ($gene_model =~ m/Solyc.*/) {
93 my ($tomato_locus_name, $version) = split (/\./ , $gene_model ) ;
94 $sgn_locusname = $tomato_locus_name;
95 print STDERR "Found tomato locus $sgn_locusname, $chr, $position\n";
97 my $locus = CXGN::Phenome::Locus->new_with_locusname($dbh, $sgn_locusname);
98 my $locus_id = $locus->get_locus_id;
99 if ($locus_id) {
100 if ($locus->get_obsolete eq 't') {
101 print STDERR "Locus $gene_model is obsolete. Skipping. \n";
102 next();
104 } else {
105 print STDERR "No locus exists for ID $gene_model.\n";
106 next();
108 #add the link via dbxref
109 #print STDERR "ADDING dbxref \n";
110 my $dbxref = $db->find_or_create_related('dbxrefs' , {
111 accession => $filename,
112 description => "chromosome $chr, position: $position",
114 #chromosome: SL2.50ch08, position: 1734498, link to genotype frequencies in tomato populations
115 my $dbxref_id = $dbxref->dbxref_id();
116 my $dbxref_object = CXGN::Chado::Dbxref->new($dbh, $dbxref_id);
117 $locus->add_locus_dbxref($dbxref_object,
118 undef,
119 $locus->get_sp_person_id);
120 $counter++;
121 print STDERR "Added TomDel link dbxref_id=$dbxref_id , locus_id = $locus_id\n ";
123 print STDERR "Added $counter locus_dbxref links \n";
124 if ( $self->trial) {
125 print "Trial mode! rolling back \n";
126 $dbh->rollback;
127 } else {
128 print "COMMITING\n";
129 $dbh->commit;
131 return 0;
134 return 1;