4 #######################################################################
5 # Copyright (C) 2007 Rod Roark <rod@sunsetsystems.com>
7 # This program is free software; you can redistribute it and/or
8 # modify it under the terms of the GNU General Public License
9 # as published by the Free Software Foundation; either version 2
10 # of the License, or (at your option) any later version.
11 #######################################################################
12 # This loads ICD9 codes and descriptions into the "codes" table of
13 # OpenEMR, scraping from from www.icd9data.com.
14 # Alternatively you can just dump the INSERT statements to stdout.
15 #######################################################################
17 # You might need to install one or more of these dependencies.
18 # The Debian/Ubuntu package names are noted as comments:
20 use DBI
; # libdbi-perl and libdbd-mysql-perl
21 use WWW
::Mechanize
; # libwww-mechanize-perl
22 use HTML
::TokeParser
; # libhtml-parser-perl
24 #######################################################################
25 # Parameters that you may customize #
26 #######################################################################
28 # Change this as needed for years other than 2008.
30 my $START_URL = "http://www.icd9data.com/2008/Volume1/default.htm";
32 # An empty database name will cause SQL INSERT statements to be dumped
33 # to stdout, with no database access. To update your OpenEMR database
34 # directly, specify its name here.
38 # You can hard-code the database user name and password (see below),
39 # or else put them into the environment with bash commands like these
40 # before running this script:
42 # export DBI_USER=username
43 # export DBI_PASS=password
45 my $dbh = DBI
->connect("dbi:mysql:dbname=$DBNAME") or die $DBI::errstr
48 # my $dbh = DBI->connect("dbi:mysql:dbname=$DBNAME", "username", "password")
49 # or die $DBI::errstr if ($DBNAME);
51 # Comment this out if you want to keep old nonmatching codes.
53 $dbh->do("delete from codes where code_type = 2") or die "oops"
56 #######################################################################
58 #######################################################################
60 $| = 1; # Turn on autoflushing of stdout.
65 #######################################################################
67 #######################################################################
69 # This function recursively scrapes all of the web pages.
74 my $browser = WWW
::Mechanize
->new();
76 my $parser = HTML
::TokeParser
->new(\
$browser->content());
78 while(my $tag = $parser->get_tag("li", "h1")) {
80 # The <li><a> sequence is recognized as a link to another list
81 # that must be followed. We handle those recursively.
82 if ($tag->[0] eq "li") {
83 $tag = $parser->get_tag;
84 next unless ($tag->[0] eq "a");
85 my $nexturl = $browser->base();
86 $nexturl =~ s
'/[^/]+$'/';
87 scrape($nexturl . $tag->[1]{href});
90 # The <h1><img> sequence starts an ICD9 code and description.
91 # If the "specific green" image is used then we know this code is
92 # valid as a specific diagnosis, and we will grab it.
94 $tag = $parser->get_tag;
95 next unless ($tag->[0] eq "img");
96 next unless ($tag->[1]{src} =~ /SpecificGreen/);
97 $tag = $parser->get_tag;
98 next unless ($tag->[0] eq "a");
99 my $tmp = $parser->get_trimmed_text;
100 unless ($tmp =~ /Diagnosis (\S+)/) {
101 print STDERR "Parse error in '$tmp' at $url\n";
105 $tag = $parser->get_tag("h2", "h1");
106 die "Parse error: <h2> missing at $url\n" unless ($tag->[0] eq "h2");
107 my $desc = $parser->get_trimmed_text;
108 $desc =~ s/'/''/g; # some descriptions will have quotes
110 # This creates the needed SQL statement, and optionally writes the
111 # code and its description to the codes table.
112 my $query = "INSERT INTO codes " .
113 "( code_type, code, modifier, code_text ) VALUES " .
114 "( 2, '$code', '', '$desc' )";
116 my $usth = $dbh->prepare("SELECT id FROM codes " .
117 "WHERE code_type = 2 AND code = '$code'")
119 $usth->execute() or die $usth->errstr;
120 my @urow = $usth->fetchrow_array();
125 $query = "UPDATE codes SET code_text = '$desc' " .
126 "WHERE code_type = 2 AND code = '$code'";
129 $dbh->do($query) or die $query;
132 print $query . ";\n";
137 # This starts the ball rolling.
140 #######################################################################
142 #######################################################################
145 print "\nInserted $countnew rows, updated $countup codes.\n";