internationalization date support ongoing code walkthrough
[openemr.git] / contrib / util / load_icd_desc.plx
blob75607ee15db5dd22caf1583e745065518867c69c
1 #!/usr/bin/perl
2 use strict;
4 #######################################################################
5 # Copyright (C) 2007-2010 Rod Roark <rod@sunsetsystems.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
11 #######################################################################
12 # This loads ICD9 codes and descriptions into the "codes" table of
13 # OpenEMR, scraping from from www.icd9data.com.
14 # Alternatively you can just dump the INSERT statements to stdout.
15 #######################################################################
17 # You might need to install one or more of these dependencies.
18 # The Debian/Ubuntu package names are noted as comments:
20 use DBI; # libdbi-perl and libdbd-mysql-perl
21 use WWW::Mechanize; # libwww-mechanize-perl
22 use HTML::TokeParser; # libhtml-parser-perl
24 #######################################################################
25 # Parameters that you may customize #
26 #######################################################################
28 # Change this as needed for years other than 2010.
30 my $START_URL = "http://www.icd9data.com/2010/Volume1/default.htm";
32 # An empty database name will cause SQL INSERT statements to be dumped
33 # to stdout, with no database access. To update your OpenEMR database
34 # directly, specify its name here.
36 my $DBNAME = "";
38 # You can hard-code the database user name and password (see below),
39 # or else put them into the environment with bash commands like these
40 # before running this script:
42 # export DBI_USER=username
43 # export DBI_PASS=password
45 my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME") or die $DBI::errstr
46 if ($DBNAME);
48 # my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME", "username", "password")
49 # or die $DBI::errstr if ($DBNAME);
51 # Comment this out if you want to keep old nonmatching codes.
53 $dbh->do("delete from codes where code_type = 2") or die "oops"
54 if ($DBNAME);
56 #######################################################################
57 # Startup #
58 #######################################################################
60 $| = 1; # Turn on autoflushing of stdout.
62 my $countup = 0;
63 my $countnew = 0;
65 #######################################################################
66 # Main Logic #
67 #######################################################################
69 # This function recursively scrapes all of the web pages.
71 sub scrape {
72 my $url = shift;
74 my $browser = WWW::Mechanize->new();
75 $browser->get($url);
76 my $parser = HTML::TokeParser->new(\$browser->content());
78 while(my $tag = $parser->get_tag("li", "div")) {
80 # The <li><a> sequence is recognized as a link to another list
81 # that must be followed. We handle those recursively.
82 if ($tag->[0] eq "li") {
83 $tag = $parser->get_tag;
84 $tag = $parser->get_tag if ($tag->[0] eq "strong");
85 next unless ($tag->[0] eq "a");
86 my $nexturl = $browser->base();
87 # $nexturl =~ s'/[^/]+$'/';
88 $nexturl =~ s'/20.+$'';
89 scrape($nexturl . $tag->[1]{href});
92 # The <div><img> sequence starts an ICD9 code and description.
93 # If the "specific green" image is used then we know this code is
94 # valid as a specific diagnosis, and we will grab it.
95 else {
96 $tag = $parser->get_tag;
97 next unless ($tag->[0] eq "img");
98 next unless ($tag->[1]{src} =~ /SpecificGreen/);
99 $tag = $parser->get_tag("a");
100 my $tmp = $parser->get_trimmed_text;
101 unless ($tmp =~ /Diagnosis Code (\S+)/) {
102 print STDERR "Parse error in '$tmp' at $url\n";
103 next;
105 my $code = $1;
106 $tag = $parser->get_tag("div");
107 my $desc = $parser->get_trimmed_text;
108 $desc =~ s/'/''/g; # some descriptions will have quotes
110 # This creates the needed SQL statement, and optionally writes the
111 # code and its description to the codes table.
112 my $query = "INSERT INTO codes " .
113 "( code_type, code, modifier, code_text ) VALUES " .
114 "( 2, '$code', '', '$desc' )";
115 if ($DBNAME) {
116 my $usth = $dbh->prepare("SELECT id FROM codes " .
117 "WHERE code_type = 2 AND code = '$code'")
118 or die $dbh->errstr;
119 $usth->execute() or die $usth->errstr;
120 my @urow = $usth->fetchrow_array();
121 if (! @urow) {
122 ++$countnew;
124 else {
125 $query = "UPDATE codes SET code_text = '$desc' " .
126 "WHERE code_type = 2 AND code = '$code'";
127 ++$countup;
129 $dbh->do($query) or die $query;
132 print $query . ";\n";
137 # This starts the ball rolling.
138 scrape($START_URL);
140 #######################################################################
141 # Shutdown #
142 #######################################################################
144 if ($DBNAME) {
145 print "\nInserted $countnew rows, updated $countup codes.\n";
146 $dbh->disconnect;