cosmetic update
[openemr.git] / contrib / util / load_icd_desc.plx
blobc4157cc3d174fdb76b77a6f42fabf7a6c4d71b57
1 #!/usr/bin/perl
2 use strict;
4 #######################################################################
5 # Copyright (C) 2007 Rod Roark <rod@sunsetsystems.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
11 #######################################################################
12 # This loads ICD9 codes and descriptions into the "codes" table of
13 # OpenEMR, scraping from from www.icd9data.com.
14 # Alternatively you can just dump the INSERT statements to stdout.
15 #######################################################################
17 # You might need to install one or more of these dependencies.
18 # The Debian/Ubuntu package names are noted as comments:
20 use DBI; # libdbi-perl and libdbd-mysql-perl
21 use WWW::Mechanize; # libwww-mechanize-perl
22 use HTML::TokeParser; # libhtml-parser-perl
24 #######################################################################
25 # Parameters that you may customize #
26 #######################################################################
28 # Change this as needed for years other than 2008.
30 my $START_URL = "http://www.icd9data.com/2008/Volume1/default.htm";
32 # An empty database name will cause SQL INSERT statements to be dumped
33 # to stdout, with no database access. To update your OpenEMR database
34 # directly, specify its name here.
36 my $DBNAME = "";
38 # You can hard-code the database user name and password (see below),
39 # or else put them into the environment with bash commands like these
40 # before running this script:
42 # export DBI_USER=username
43 # export DBI_PASS=password
45 my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME") or die $DBI::errstr
46 if ($DBNAME);
48 # my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME", "username", "password")
49 # or die $DBI::errstr if ($DBNAME);
51 # Comment this out if you want to keep old nonmatching codes.
53 $dbh->do("delete from codes where code_type = 2") or die "oops"
54 if ($DBNAME);
56 #######################################################################
57 # Startup #
58 #######################################################################
60 $| = 1; # Turn on autoflushing of stdout.
62 my $countup = 0;
63 my $countnew = 0;
65 #######################################################################
66 # Main Logic #
67 #######################################################################
69 # This function recursively scrapes all of the web pages.
71 sub scrape {
72 my $url = shift;
74 my $browser = WWW::Mechanize->new();
75 $browser->get($url);
76 my $parser = HTML::TokeParser->new(\$browser->content());
78 while(my $tag = $parser->get_tag("li", "h1")) {
80 # The <li><a> sequence is recognized as a link to another list
81 # that must be followed. We handle those recursively.
82 if ($tag->[0] eq "li") {
83 $tag = $parser->get_tag;
84 next unless ($tag->[0] eq "a");
85 my $nexturl = $browser->base();
86 $nexturl =~ s'/[^/]+$'/';
87 scrape($nexturl . $tag->[1]{href});
90 # The <h1><img> sequence starts an ICD9 code and description.
91 # If the "specific green" image is used then we know this code is
92 # valid as a specific diagnosis, and we will grab it.
93 else {
94 $tag = $parser->get_tag;
95 next unless ($tag->[0] eq "img");
96 next unless ($tag->[1]{src} =~ /SpecificGreen/);
97 $tag = $parser->get_tag;
98 next unless ($tag->[0] eq "a");
99 my $tmp = $parser->get_trimmed_text;
100 unless ($tmp =~ /Diagnosis (\S+)/) {
101 print STDERR "Parse error in '$tmp' at $url\n";
102 next;
104 my $code = $1;
105 $tag = $parser->get_tag("h2", "h1");
106 die "Parse error: <h2> missing at $url\n" unless ($tag->[0] eq "h2");
107 my $desc = $parser->get_trimmed_text;
108 $desc =~ s/'/''/g; # some descriptions will have quotes
110 # This creates the needed SQL statement, and optionally writes the
111 # code and its description to the codes table.
112 my $query = "INSERT INTO codes " .
113 "( code_type, code, modifier, code_text ) VALUES " .
114 "( 2, '$code', '', '$desc' )";
115 if ($DBNAME) {
116 my $usth = $dbh->prepare("SELECT id FROM codes " .
117 "WHERE code_type = 2 AND code = '$code'")
118 or die $dbh->errstr;
119 $usth->execute() or die $usth->errstr;
120 my @urow = $usth->fetchrow_array();
121 if (! @urow) {
122 ++$countnew;
124 else {
125 $query = "UPDATE codes SET code_text = '$desc' " .
126 "WHERE code_type = 2 AND code = '$code'";
127 ++$countup;
129 $dbh->do($query) or die $query;
132 print $query . ";\n";
137 # This starts the ball rolling.
138 scrape($START_URL);
140 #######################################################################
141 # Shutdown #
142 #######################################################################
144 if ($DBNAME) {
145 print "\nInserted $countnew rows, updated $countup codes.\n";
146 $dbh->disconnect;